perf tests: Add APX and other new instructions to x86 instruction decoder test

Add samples of APX and other new instructions to the 'x86 instruction
decoder - new instructions' test.

Note the test is only available if the perf tool has been built with
EXTRA_TESTS=1.

Example:

  $ make EXTRA_TESTS=1 -C tools/perf
  $ tools/perf/perf test -F -v 'new ins' |& grep -i 'jmpabs\|popp\|pushp'
  Decoded ok: d5 00 a1 ef cd ab 90 78 56 34 12    jmpabs $0x1234567890abcdef
  Decoded ok: d5 08 53                    pushp  %rbx
  Decoded ok: d5 18 50                    pushp  %r16
  Decoded ok: d5 19 57                    pushp  %r31
  Decoded ok: d5 19 5f                    popp   %r31
  Decoded ok: d5 18 58                    popp   %r16
  Decoded ok: d5 08 5b                    popp   %rbx

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Chang S. Bae <chang.seok.bae@intel.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Nikolay Borisov <nik.borisov@suse.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: x86@kernel.org
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20240502105853.5338-11-adrian.hunter@intel.com
This commit is contained in:
Adrian Hunter 2024-05-02 13:58:53 +03:00 committed by Namhyung Kim
parent a44abd2c4c
commit fcd094e52b
3 changed files with 1739 additions and 0 deletions

View File

@ -3107,6 +3107,122 @@
"62 f5 7c 08 2e ca \tvucomish %xmm2,%xmm1",},
{{0x62, 0xf5, 0x7c, 0x08, 0x2e, 0x8c, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 11, 0, "", "",
"62 f5 7c 08 2e 8c c8 78 56 34 12 \tvucomish 0x12345678(%eax,%ecx,8),%xmm1",},
{{0xf3, 0x0f, 0x38, 0xdc, 0xd1, }, 5, 0, "", "",
"f3 0f 38 dc d1 \tloadiwkey %xmm1,%xmm2",},
{{0xf3, 0x0f, 0x38, 0xfa, 0xd0, }, 5, 0, "", "",
"f3 0f 38 fa d0 \tencodekey128 %eax,%edx",},
{{0xf3, 0x0f, 0x38, 0xfb, 0xd0, }, 5, 0, "", "",
"f3 0f 38 fb d0 \tencodekey256 %eax,%edx",},
{{0xf3, 0x0f, 0x38, 0xdc, 0x5a, 0x77, }, 6, 0, "", "",
"f3 0f 38 dc 5a 77 \taesenc128kl 0x77(%edx),%xmm3",},
{{0xf3, 0x0f, 0x38, 0xde, 0x5a, 0x77, }, 6, 0, "", "",
"f3 0f 38 de 5a 77 \taesenc256kl 0x77(%edx),%xmm3",},
{{0xf3, 0x0f, 0x38, 0xdd, 0x5a, 0x77, }, 6, 0, "", "",
"f3 0f 38 dd 5a 77 \taesdec128kl 0x77(%edx),%xmm3",},
{{0xf3, 0x0f, 0x38, 0xdf, 0x5a, 0x77, }, 6, 0, "", "",
"f3 0f 38 df 5a 77 \taesdec256kl 0x77(%edx),%xmm3",},
{{0xf3, 0x0f, 0x38, 0xd8, 0x42, 0x77, }, 6, 0, "", "",
"f3 0f 38 d8 42 77 \taesencwide128kl 0x77(%edx)",},
{{0xf3, 0x0f, 0x38, 0xd8, 0x52, 0x77, }, 6, 0, "", "",
"f3 0f 38 d8 52 77 \taesencwide256kl 0x77(%edx)",},
{{0xf3, 0x0f, 0x38, 0xd8, 0x4a, 0x77, }, 6, 0, "", "",
"f3 0f 38 d8 4a 77 \taesdecwide128kl 0x77(%edx)",},
{{0xf3, 0x0f, 0x38, 0xd8, 0x5a, 0x77, }, 6, 0, "", "",
"f3 0f 38 d8 5a 77 \taesdecwide256kl 0x77(%edx)",},
{{0x0f, 0x38, 0xfc, 0x08, }, 4, 0, "", "",
"0f 38 fc 08 \taadd %ecx,(%eax)",},
{{0x0f, 0x38, 0xfc, 0x15, 0x78, 0x56, 0x34, 0x12, }, 8, 0, "", "",
"0f 38 fc 15 78 56 34 12 \taadd %edx,0x12345678",},
{{0x0f, 0x38, 0xfc, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"0f 38 fc 94 c8 78 56 34 12 \taadd %edx,0x12345678(%eax,%ecx,8)",},
{{0x66, 0x0f, 0x38, 0xfc, 0x08, }, 5, 0, "", "",
"66 0f 38 fc 08 \taand %ecx,(%eax)",},
{{0x66, 0x0f, 0x38, 0xfc, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"66 0f 38 fc 15 78 56 34 12 \taand %edx,0x12345678",},
{{0x66, 0x0f, 0x38, 0xfc, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"66 0f 38 fc 94 c8 78 56 34 12 \taand %edx,0x12345678(%eax,%ecx,8)",},
{{0xf2, 0x0f, 0x38, 0xfc, 0x08, }, 5, 0, "", "",
"f2 0f 38 fc 08 \taor %ecx,(%eax)",},
{{0xf2, 0x0f, 0x38, 0xfc, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f2 0f 38 fc 15 78 56 34 12 \taor %edx,0x12345678",},
{{0xf2, 0x0f, 0x38, 0xfc, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"f2 0f 38 fc 94 c8 78 56 34 12 \taor %edx,0x12345678(%eax,%ecx,8)",},
{{0xf3, 0x0f, 0x38, 0xfc, 0x08, }, 5, 0, "", "",
"f3 0f 38 fc 08 \taxor %ecx,(%eax)",},
{{0xf3, 0x0f, 0x38, 0xfc, 0x15, 0x78, 0x56, 0x34, 0x12, }, 9, 0, "", "",
"f3 0f 38 fc 15 78 56 34 12 \taxor %edx,0x12345678",},
{{0xf3, 0x0f, 0x38, 0xfc, 0x94, 0xc8, 0x78, 0x56, 0x34, 0x12, }, 10, 0, "", "",
"f3 0f 38 fc 94 c8 78 56 34 12 \taxor %edx,0x12345678(%eax,%ecx,8)",},
{{0xc4, 0xe2, 0x7a, 0xb1, 0x31, }, 5, 0, "", "",
"c4 e2 7a b1 31 \tvbcstnebf162ps (%ecx),%xmm6",},
{{0xc4, 0xe2, 0x79, 0xb1, 0x31, }, 5, 0, "", "",
"c4 e2 79 b1 31 \tvbcstnesh2ps (%ecx),%xmm6",},
{{0xc4, 0xe2, 0x7a, 0xb0, 0x31, }, 5, 0, "", "",
"c4 e2 7a b0 31 \tvcvtneebf162ps (%ecx),%xmm6",},
{{0xc4, 0xe2, 0x79, 0xb0, 0x31, }, 5, 0, "", "",
"c4 e2 79 b0 31 \tvcvtneeph2ps (%ecx),%xmm6",},
{{0xc4, 0xe2, 0x7b, 0xb0, 0x31, }, 5, 0, "", "",
"c4 e2 7b b0 31 \tvcvtneobf162ps (%ecx),%xmm6",},
{{0xc4, 0xe2, 0x78, 0xb0, 0x31, }, 5, 0, "", "",
"c4 e2 78 b0 31 \tvcvtneoph2ps (%ecx),%xmm6",},
{{0x62, 0xf2, 0x7e, 0x08, 0x72, 0xf1, }, 6, 0, "", "",
"62 f2 7e 08 72 f1 \tvcvtneps2bf16 %xmm1,%xmm6",},
{{0xc4, 0xe2, 0x6b, 0x50, 0xd9, }, 5, 0, "", "",
"c4 e2 6b 50 d9 \tvpdpbssd %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6b, 0x51, 0xd9, }, 5, 0, "", "",
"c4 e2 6b 51 d9 \tvpdpbssds %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6a, 0x50, 0xd9, }, 5, 0, "", "",
"c4 e2 6a 50 d9 \tvpdpbsud %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6a, 0x51, 0xd9, }, 5, 0, "", "",
"c4 e2 6a 51 d9 \tvpdpbsuds %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x68, 0x50, 0xd9, }, 5, 0, "", "",
"c4 e2 68 50 d9 \tvpdpbuud %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x68, 0x51, 0xd9, }, 5, 0, "", "",
"c4 e2 68 51 d9 \tvpdpbuuds %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6a, 0xd2, 0xd9, }, 5, 0, "", "",
"c4 e2 6a d2 d9 \tvpdpwsud %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6a, 0xd3, 0xd9, }, 5, 0, "", "",
"c4 e2 6a d3 d9 \tvpdpwsuds %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x69, 0xd2, 0xd9, }, 5, 0, "", "",
"c4 e2 69 d2 d9 \tvpdpwusd %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x69, 0xd3, 0xd9, }, 5, 0, "", "",
"c4 e2 69 d3 d9 \tvpdpwusds %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x68, 0xd2, 0xd9, }, 5, 0, "", "",
"c4 e2 68 d2 d9 \tvpdpwuud %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x68, 0xd3, 0xd9, }, 5, 0, "", "",
"c4 e2 68 d3 d9 \tvpdpwuuds %xmm1,%xmm2,%xmm3",},
{{0x62, 0xf2, 0xed, 0x08, 0xb5, 0xd9, }, 6, 0, "", "",
"62 f2 ed 08 b5 d9 \tvpmadd52huq %xmm1,%xmm2,%xmm3",},
{{0x62, 0xf2, 0xed, 0x08, 0xb4, 0xd9, }, 6, 0, "", "",
"62 f2 ed 08 b4 d9 \tvpmadd52luq %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x7f, 0xcc, 0xd1, }, 5, 0, "", "",
"c4 e2 7f cc d1 \tvsha512msg1 %xmm1,%ymm2",},
{{0xc4, 0xe2, 0x7f, 0xcd, 0xd1, }, 5, 0, "", "",
"c4 e2 7f cd d1 \tvsha512msg2 %ymm1,%ymm2",},
{{0xc4, 0xe2, 0x6f, 0xcb, 0xd9, }, 5, 0, "", "",
"c4 e2 6f cb d9 \tvsha512rnds2 %xmm1,%ymm2,%ymm3",},
{{0xc4, 0xe2, 0x68, 0xda, 0xd9, }, 5, 0, "", "",
"c4 e2 68 da d9 \tvsm3msg1 %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x69, 0xda, 0xd9, }, 5, 0, "", "",
"c4 e2 69 da d9 \tvsm3msg2 %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe3, 0x69, 0xde, 0xd9, 0xa1, }, 6, 0, "", "",
"c4 e3 69 de d9 a1 \tvsm3rnds2 $0xa1,%xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6a, 0xda, 0xd9, }, 5, 0, "", "",
"c4 e2 6a da d9 \tvsm4key4 %xmm1,%xmm2,%xmm3",},
{{0xc4, 0xe2, 0x6b, 0xda, 0xd9, }, 5, 0, "", "",
"c4 e2 6b da d9 \tvsm4rnds4 %xmm1,%xmm2,%xmm3",},
{{0x0f, 0x0d, 0x00, }, 3, 0, "", "",
"0f 0d 00 \tprefetch (%eax)",},
{{0x0f, 0x18, 0x08, }, 3, 0, "", "",
"0f 18 08 \tprefetcht0 (%eax)",},
{{0x0f, 0x18, 0x10, }, 3, 0, "", "",
"0f 18 10 \tprefetcht1 (%eax)",},
{{0x0f, 0x18, 0x18, }, 3, 0, "", "",
"0f 18 18 \tprefetcht2 (%eax)",},
{{0x0f, 0x18, 0x00, }, 3, 0, "", "",
"0f 18 00 \tprefetchnta (%eax)",},
{{0x0f, 0x01, 0xc6, }, 3, 0, "", "",
"0f 01 c6 \twrmsrns",},
{{0xf3, 0x0f, 0x3a, 0xf0, 0xc0, 0x00, }, 6, 0, "", "",
"f3 0f 3a f0 c0 00 \threset $0x0",},
{{0x0f, 0x01, 0xe8, }, 3, 0, "", "",

File diff suppressed because it is too large Load Diff

View File

@ -2628,6 +2628,512 @@ int main(void)
asm volatile("vucomish 0x12345678(%rax,%rcx,8), %xmm1");
asm volatile("vucomish 0x12345678(%eax,%ecx,8), %xmm1");
/* Key Locker */
asm volatile("loadiwkey %xmm1, %xmm2");
asm volatile("encodekey128 %eax, %edx");
asm volatile("encodekey256 %eax, %edx");
asm volatile("aesenc128kl 0x77(%rdx), %xmm3");
asm volatile("aesenc256kl 0x77(%rdx), %xmm3");
asm volatile("aesdec128kl 0x77(%rdx), %xmm3");
asm volatile("aesdec256kl 0x77(%rdx), %xmm3");
asm volatile("aesencwide128kl 0x77(%rdx)");
asm volatile("aesencwide256kl 0x77(%rdx)");
asm volatile("aesdecwide128kl 0x77(%rdx)");
asm volatile("aesdecwide256kl 0x77(%rdx)");
/* Remote Atomic Operations */
asm volatile("aadd %ecx,(%rax)");
asm volatile("aadd %edx,(%r8)");
asm volatile("aadd %edx,0x12345678(%rax,%rcx,8)");
asm volatile("aadd %edx,0x12345678(%r8,%rcx,8)");
asm volatile("aadd %rcx,(%rax)");
asm volatile("aadd %rdx,(%r8)");
asm volatile("aadd %rdx,(0x12345678)");
asm volatile("aadd %rdx,0x12345678(%rax,%rcx,8)");
asm volatile("aadd %rdx,0x12345678(%r8,%rcx,8)");
asm volatile("aand %ecx,(%rax)");
asm volatile("aand %edx,(%r8)");
asm volatile("aand %edx,0x12345678(%rax,%rcx,8)");
asm volatile("aand %edx,0x12345678(%r8,%rcx,8)");
asm volatile("aand %rcx,(%rax)");
asm volatile("aand %rdx,(%r8)");
asm volatile("aand %rdx,(0x12345678)");
asm volatile("aand %rdx,0x12345678(%rax,%rcx,8)");
asm volatile("aand %rdx,0x12345678(%r8,%rcx,8)");
asm volatile("aor %ecx,(%rax)");
asm volatile("aor %edx,(%r8)");
asm volatile("aor %edx,0x12345678(%rax,%rcx,8)");
asm volatile("aor %edx,0x12345678(%r8,%rcx,8)");
asm volatile("aor %rcx,(%rax)");
asm volatile("aor %rdx,(%r8)");
asm volatile("aor %rdx,(0x12345678)");
asm volatile("aor %rdx,0x12345678(%rax,%rcx,8)");
asm volatile("aor %rdx,0x12345678(%r8,%rcx,8)");
asm volatile("axor %ecx,(%rax)");
asm volatile("axor %edx,(%r8)");
asm volatile("axor %edx,0x12345678(%rax,%rcx,8)");
asm volatile("axor %edx,0x12345678(%r8,%rcx,8)");
asm volatile("axor %rcx,(%rax)");
asm volatile("axor %rdx,(%r8)");
asm volatile("axor %rdx,(0x12345678)");
asm volatile("axor %rdx,0x12345678(%rax,%rcx,8)");
asm volatile("axor %rdx,0x12345678(%r8,%rcx,8)");
/* VEX CMPxxXADD */
asm volatile("cmpbexadd %ebx,%ecx,(%r9)");
asm volatile("cmpbxadd %ebx,%ecx,(%r9)");
asm volatile("cmplexadd %ebx,%ecx,(%r9)");
asm volatile("cmplxadd %ebx,%ecx,(%r9)");
asm volatile("cmpnbexadd %ebx,%ecx,(%r9)");
asm volatile("cmpnbxadd %ebx,%ecx,(%r9)");
asm volatile("cmpnlexadd %ebx,%ecx,(%r9)");
asm volatile("cmpnlxadd %ebx,%ecx,(%r9)");
asm volatile("cmpnoxadd %ebx,%ecx,(%r9)");
asm volatile("cmpnpxadd %ebx,%ecx,(%r9)");
asm volatile("cmpnsxadd %ebx,%ecx,(%r9)");
asm volatile("cmpnzxadd %ebx,%ecx,(%r9)");
asm volatile("cmpoxadd %ebx,%ecx,(%r9)");
asm volatile("cmppxadd %ebx,%ecx,(%r9)");
asm volatile("cmpsxadd %ebx,%ecx,(%r9)");
asm volatile("cmpzxadd %ebx,%ecx,(%r9)");
/* Pre-fetch */
asm volatile("prefetch (%rax)");
asm volatile("prefetcht0 (%rax)");
asm volatile("prefetcht1 (%rax)");
asm volatile("prefetcht2 (%rax)");
asm volatile("prefetchnta (%rax)");
asm volatile("prefetchit0 0x12345678(%rip)");
asm volatile("prefetchit1 0x12345678(%rip)");
/* MSR List */
asm volatile("rdmsrlist");
asm volatile("wrmsrlist");
/* User Read/Write MSR */
asm volatile("urdmsr %rdx,%rax");
asm volatile("urdmsr %rdx,%r22");
asm volatile("urdmsr $0x7f,%r12");
asm volatile("uwrmsr %rax,%rdx");
asm volatile("uwrmsr %r22,%rdx");
asm volatile("uwrmsr %r12,$0x7f");
/* AVX NE Convert */
asm volatile("vbcstnebf162ps (%rcx),%xmm6");
asm volatile("vbcstnesh2ps (%rcx),%xmm6");
asm volatile("vcvtneebf162ps (%rcx),%xmm6");
asm volatile("vcvtneeph2ps (%rcx),%xmm6");
asm volatile("vcvtneobf162ps (%rcx),%xmm6");
asm volatile("vcvtneoph2ps (%rcx),%xmm6");
asm volatile("vcvtneps2bf16 %xmm1,%xmm6");
/* FRED */
asm volatile("erets"); /* Expecting: erets indirect 0 */
asm volatile("eretu"); /* Expecting: eretu indirect 0 */
/* AMX Complex */
asm volatile("tcmmimfp16ps %tmm1,%tmm2,%tmm3");
asm volatile("tcmmrlfp16ps %tmm1,%tmm2,%tmm3");
/* AMX FP16 */
asm volatile("tdpfp16ps %tmm1,%tmm2,%tmm3");
/* REX2 */
asm volatile("test $0x5, %r18b");
asm volatile("test $0x5, %r18d");
asm volatile("test $0x5, %r18");
asm volatile("test $0x5, %r18w");
asm volatile("imull %eax, %r14d");
asm volatile("imull %eax, %r17d");
asm volatile("punpckldq (%r18), %mm2");
asm volatile("leal (%rax), %r16d");
asm volatile("leal (%rax), %r31d");
asm volatile("leal (,%r16), %eax");
asm volatile("leal (,%r31), %eax");
asm volatile("leal (%r16), %eax");
asm volatile("leal (%r31), %eax");
asm volatile("leaq (%rax), %r15");
asm volatile("leaq (%rax), %r16");
asm volatile("leaq (%r15), %rax");
asm volatile("leaq (%r16), %rax");
asm volatile("leaq (,%r15), %rax");
asm volatile("leaq (,%r16), %rax");
asm volatile("add (%r16), %r8");
asm volatile("add (%r16), %r15");
asm volatile("mov (,%r9), %r16");
asm volatile("mov (,%r14), %r16");
asm volatile("sub (%r10), %r31");
asm volatile("sub (%r13), %r31");
asm volatile("leal 1(%r16, %r21), %eax");
asm volatile("leal 1(%r16, %r26), %r31d");
asm volatile("leal 129(%r21, %r9), %eax");
asm volatile("leal 129(%r26, %r9), %r31d");
/*
* Have to use .byte for jmpabs because gas does not support the
* mnemonic for some reason, but then it also gets the source line wrong
* with .byte, so the following is a workaround.
*/
asm volatile(""); /* Expecting: jmp indirect 0 */
asm volatile(".byte 0xd5, 0x00, 0xa1, 0xef, 0xcd, 0xab, 0x90, 0x78, 0x56, 0x34, 0x12");
asm volatile("pushp %rbx");
asm volatile("pushp %r16");
asm volatile("pushp %r31");
asm volatile("popp %r31");
asm volatile("popp %r16");
asm volatile("popp %rbx");
/* APX */
asm volatile("bextr %r25d,%edx,%r10d");
asm volatile("bextr %r25d,0x123(%r31,%rax,4),%edx");
asm volatile("bextr %r31,%r15,%r11");
asm volatile("bextr %r31,0x123(%r31,%rax,4),%r15");
asm volatile("blsi %r25d,%edx");
asm volatile("blsi %r31,%r15");
asm volatile("blsi 0x123(%r31,%rax,4),%r25d");
asm volatile("blsi 0x123(%r31,%rax,4),%r31");
asm volatile("blsmsk %r25d,%edx");
asm volatile("blsmsk %r31,%r15");
asm volatile("blsmsk 0x123(%r31,%rax,4),%r25d");
asm volatile("blsmsk 0x123(%r31,%rax,4),%r31");
asm volatile("blsr %r25d,%edx");
asm volatile("blsr %r31,%r15");
asm volatile("blsr 0x123(%r31,%rax,4),%r25d");
asm volatile("blsr 0x123(%r31,%rax,4),%r31");
asm volatile("bzhi %r25d,%edx,%r10d");
asm volatile("bzhi %r25d,0x123(%r31,%rax,4),%edx");
asm volatile("bzhi %r31,%r15,%r11");
asm volatile("bzhi %r31,0x123(%r31,%rax,4),%r15");
asm volatile("cmpbexadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpbexadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpbxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpbxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmplxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmplxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnbexadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnbexadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnbxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnbxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnlexadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnlexadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnlxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnlxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnoxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnoxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnpxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnpxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnsxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnsxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpnzxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpnzxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpoxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpoxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmppxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmppxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpsxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpsxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("cmpzxadd %r25d,%edx,0x123(%r31,%rax,4)");
asm volatile("cmpzxadd %r31,%r15,0x123(%r31,%rax,4)");
asm volatile("crc32q %r31, %r22");
asm volatile("crc32q (%r31), %r22");
asm volatile("crc32b %r19b, %r17");
asm volatile("crc32b %r19b, %r21d");
asm volatile("crc32b (%r19),%ebx");
asm volatile("crc32l %r31d, %r23d");
asm volatile("crc32l (%r31), %r23d");
asm volatile("crc32w %r31w, %r21d");
asm volatile("crc32w (%r31),%r21d");
asm volatile("crc32 %rax, %r18");
asm volatile("enqcmd 0x123(%r31d,%eax,4),%r25d");
asm volatile("enqcmd 0x123(%r31,%rax,4),%r31");
asm volatile("enqcmds 0x123(%r31d,%eax,4),%r25d");
asm volatile("enqcmds 0x123(%r31,%rax,4),%r31");
asm volatile("invept 0x123(%r31,%rax,4),%r31");
asm volatile("invpcid 0x123(%r31,%rax,4),%r31");
asm volatile("invvpid 0x123(%r31,%rax,4),%r31");
asm volatile("kmovb %k5,%r25d");
asm volatile("kmovb %k5,0x123(%r31,%rax,4)");
asm volatile("kmovb %r25d,%k5");
asm volatile("kmovb 0x123(%r31,%rax,4),%k5");
asm volatile("kmovd %k5,%r25d");
asm volatile("kmovd %k5,0x123(%r31,%rax,4)");
asm volatile("kmovd %r25d,%k5");
asm volatile("kmovd 0x123(%r31,%rax,4),%k5");
asm volatile("kmovq %k5,%r31");
asm volatile("kmovq %k5,0x123(%r31,%rax,4)");
asm volatile("kmovq %r31,%k5");
asm volatile("kmovq 0x123(%r31,%rax,4),%k5");
asm volatile("kmovw %k5,%r25d");
asm volatile("kmovw %k5,0x123(%r31,%rax,4)");
asm volatile("kmovw %r25d,%k5");
asm volatile("kmovw 0x123(%r31,%rax,4),%k5");
asm volatile("ldtilecfg 0x123(%r31,%rax,4)");
asm volatile("movbe %r18w,%ax");
asm volatile("movbe %r15w,%ax");
asm volatile("movbe %r18w,0x123(%r16,%rax,4)");
asm volatile("movbe %r18w,0x123(%r31,%rax,4)");
asm volatile("movbe %r25d,%edx");
asm volatile("movbe %r15d,%edx");
asm volatile("movbe %r25d,0x123(%r16,%rax,4)");
asm volatile("movbe %r31,%r15");
asm volatile("movbe %r8,%r15");
asm volatile("movbe %r31,0x123(%r16,%rax,4)");
asm volatile("movbe %r31,0x123(%r31,%rax,4)");
asm volatile("movbe 0x123(%r16,%rax,4),%r31");
asm volatile("movbe 0x123(%r31,%rax,4),%r18w");
asm volatile("movbe 0x123(%r31,%rax,4),%r25d");
asm volatile("movdir64b 0x123(%r31d,%eax,4),%r25d");
asm volatile("movdir64b 0x123(%r31,%rax,4),%r31");
asm volatile("movdiri %r25d,0x123(%r31,%rax,4)");
asm volatile("movdiri %r31,0x123(%r31,%rax,4)");
asm volatile("pdep %r25d,%edx,%r10d");
asm volatile("pdep %r31,%r15,%r11");
asm volatile("pdep 0x123(%r31,%rax,4),%r25d,%edx");
asm volatile("pdep 0x123(%r31,%rax,4),%r31,%r15");
asm volatile("pext %r25d,%edx,%r10d");
asm volatile("pext %r31,%r15,%r11");
asm volatile("pext 0x123(%r31,%rax,4),%r25d,%edx");
asm volatile("pext 0x123(%r31,%rax,4),%r31,%r15");
asm volatile("shlx %r25d,%edx,%r10d");
asm volatile("shlx %r25d,0x123(%r31,%rax,4),%edx");
asm volatile("shlx %r31,%r15,%r11");
asm volatile("shlx %r31,0x123(%r31,%rax,4),%r15");
asm volatile("shrx %r25d,%edx,%r10d");
asm volatile("shrx %r25d,0x123(%r31,%rax,4),%edx");
asm volatile("shrx %r31,%r15,%r11");
asm volatile("shrx %r31,0x123(%r31,%rax,4),%r15");
asm volatile("sttilecfg 0x123(%r31,%rax,4)");
asm volatile("tileloadd 0x123(%r31,%rax,4),%tmm6");
asm volatile("tileloaddt1 0x123(%r31,%rax,4),%tmm6");
asm volatile("tilestored %tmm6,0x123(%r31,%rax,4)");
asm volatile("vbroadcastf128 (%r16),%ymm3");
asm volatile("vbroadcasti128 (%r16),%ymm3");
asm volatile("vextractf128 $1,%ymm3,(%r16)");
asm volatile("vextracti128 $1,%ymm3,(%r16)");
asm volatile("vinsertf128 $1,(%r16),%ymm3,%ymm8");
asm volatile("vinserti128 $1,(%r16),%ymm3,%ymm8");
asm volatile("vroundpd $1,(%r24),%xmm6");
asm volatile("vroundps $2,(%r24),%xmm6");
asm volatile("vroundsd $3,(%r24),%xmm6,%xmm3");
asm volatile("vroundss $4,(%r24),%xmm6,%xmm3");
asm volatile("wrssd %r25d,0x123(%r31,%rax,4)");
asm volatile("wrssq %r31,0x123(%r31,%rax,4)");
asm volatile("wrussd %r25d,0x123(%r31,%rax,4)");
asm volatile("wrussq %r31,0x123(%r31,%rax,4)");
/* APX new data destination */
asm volatile("adc $0x1234,%ax,%r30w");
asm volatile("adc %r15b,%r17b,%r18b");
asm volatile("adc %r15d,(%r8),%r18d");
asm volatile("adc (%r15,%rax,1),%r16b,%r8b");
asm volatile("adc (%r15,%rax,1),%r16w,%r8w");
asm volatile("adcl $0x11,(%r19,%rax,4),%r20d");
asm volatile("adcx %r15d,%r8d,%r18d");
asm volatile("adcx (%r15,%r31,1),%r8");
asm volatile("adcx (%r15,%r31,1),%r8d,%r18d");
asm volatile("add $0x1234,%ax,%r30w");
asm volatile("add $0x12344433,%r15,%r16");
asm volatile("add $0x34,%r13b,%r17b");
asm volatile("add $0xfffffffff4332211,%rax,%r8");
asm volatile("add %r31,%r8,%r16");
asm volatile("add %r31,(%r8),%r16");
asm volatile("add %r31,(%r8,%r16,8),%r16");
asm volatile("add %r31b,%r8b,%r16b");
asm volatile("add %r31d,%r8d,%r16d");
asm volatile("add %r31w,%r8w,%r16w");
asm volatile("add (%r31),%r8,%r16");
asm volatile("add 0x9090(%r31,%r16,1),%r8,%r16");
asm volatile("addb %r31b,%r8b,%r16b");
asm volatile("addl %r31d,%r8d,%r16d");
asm volatile("addl $0x11,(%r19,%rax,4),%r20d");
asm volatile("addq %r31,%r8,%r16");
asm volatile("addq $0x12344433,(%r15,%rcx,4),%r16");
asm volatile("addw %r31w,%r8w,%r16w");
asm volatile("adox %r15d,%r8d,%r18d");
asm volatile("{load} add %r31,%r8,%r16");
asm volatile("{store} add %r31,%r8,%r16");
asm volatile("adox (%r15,%r31,1),%r8");
asm volatile("adox (%r15,%r31,1),%r8d,%r18d");
asm volatile("and $0x1234,%ax,%r30w");
asm volatile("and %r15b,%r17b,%r18b");
asm volatile("and %r15d,(%r8),%r18d");
asm volatile("and (%r15,%rax,1),%r16b,%r8b");
asm volatile("and (%r15,%rax,1),%r16w,%r8w");
asm volatile("andl $0x11,(%r19,%rax,4),%r20d");
asm volatile("cmova 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovae 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovb 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovbe 0x90909090(%eax),%edx,%r8d");
asm volatile("cmove 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovg 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovge 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovl 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovle 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovne 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovno 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovnp 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovns 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovo 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovp 0x90909090(%eax),%edx,%r8d");
asm volatile("cmovs 0x90909090(%eax),%edx,%r8d");
asm volatile("dec %rax,%r17");
asm volatile("decb (%r31,%r12,1),%r8b");
asm volatile("imul 0x909(%rax,%r31,8),%rdx,%r25");
asm volatile("imul 0x90909(%eax),%edx,%r8d");
asm volatile("inc %r31,%r16");
asm volatile("inc %r31,%r8");
asm volatile("inc %rax,%rbx");
asm volatile("neg %rax,%r17");
asm volatile("negb (%r31,%r12,1),%r8b");
asm volatile("not %rax,%r17");
asm volatile("notb (%r31,%r12,1),%r8b");
asm volatile("or $0x1234,%ax,%r30w");
asm volatile("or %r15b,%r17b,%r18b");
asm volatile("or %r15d,(%r8),%r18d");
asm volatile("or (%r15,%rax,1),%r16b,%r8b");
asm volatile("or (%r15,%rax,1),%r16w,%r8w");
asm volatile("orl $0x11,(%r19,%rax,4),%r20d");
asm volatile("rcl $0x2,%r12b,%r31b");
asm volatile("rcl %cl,%r16b,%r8b");
asm volatile("rclb $0x1,(%rax),%r31b");
asm volatile("rcll $0x2,(%rax),%r31d");
asm volatile("rclw $0x1,(%rax),%r31w");
asm volatile("rclw %cl,(%r19,%rax,4),%r31w");
asm volatile("rcr $0x2,%r12b,%r31b");
asm volatile("rcr %cl,%r16b,%r8b");
asm volatile("rcrb $0x1,(%rax),%r31b");
asm volatile("rcrl $0x2,(%rax),%r31d");
asm volatile("rcrw $0x1,(%rax),%r31w");
asm volatile("rcrw %cl,(%r19,%rax,4),%r31w");
asm volatile("rol $0x2,%r12b,%r31b");
asm volatile("rol %cl,%r16b,%r8b");
asm volatile("rolb $0x1,(%rax),%r31b");
asm volatile("roll $0x2,(%rax),%r31d");
asm volatile("rolw $0x1,(%rax),%r31w");
asm volatile("rolw %cl,(%r19,%rax,4),%r31w");
asm volatile("ror $0x2,%r12b,%r31b");
asm volatile("ror %cl,%r16b,%r8b");
asm volatile("rorb $0x1,(%rax),%r31b");
asm volatile("rorl $0x2,(%rax),%r31d");
asm volatile("rorw $0x1,(%rax),%r31w");
asm volatile("rorw %cl,(%r19,%rax,4),%r31w");
asm volatile("sar $0x2,%r12b,%r31b");
asm volatile("sar %cl,%r16b,%r8b");
asm volatile("sarb $0x1,(%rax),%r31b");
asm volatile("sarl $0x2,(%rax),%r31d");
asm volatile("sarw $0x1,(%rax),%r31w");
asm volatile("sarw %cl,(%r19,%rax,4),%r31w");
asm volatile("sbb $0x1234,%ax,%r30w");
asm volatile("sbb %r15b,%r17b,%r18b");
asm volatile("sbb %r15d,(%r8),%r18d");
asm volatile("sbb (%r15,%rax,1),%r16b,%r8b");
asm volatile("sbb (%r15,%rax,1),%r16w,%r8w");
asm volatile("sbbl $0x11,(%r19,%rax,4),%r20d");
asm volatile("shl $0x2,%r12b,%r31b");
asm volatile("shl $0x2,%r12b,%r31b");
asm volatile("shl %cl,%r16b,%r8b");
asm volatile("shl %cl,%r16b,%r8b");
asm volatile("shlb $0x1,(%rax),%r31b");
asm volatile("shlb $0x1,(%rax),%r31b");
asm volatile("shld $0x1,%r12,(%rax),%r31");
asm volatile("shld $0x2,%r15d,(%rax),%r31d");
asm volatile("shld $0x2,%r8w,%r12w,%r31w");
asm volatile("shld %cl,%r12,%r16,%r8");
asm volatile("shld %cl,%r13w,(%r19,%rax,4),%r31w");
asm volatile("shld %cl,%r9w,(%rax),%r31w");
asm volatile("shll $0x2,(%rax),%r31d");
asm volatile("shll $0x2,(%rax),%r31d");
asm volatile("shlw $0x1,(%rax),%r31w");
asm volatile("shlw $0x1,(%rax),%r31w");
asm volatile("shlw %cl,(%r19,%rax,4),%r31w");
asm volatile("shlw %cl,(%r19,%rax,4),%r31w");
asm volatile("shr $0x2,%r12b,%r31b");
asm volatile("shr %cl,%r16b,%r8b");
asm volatile("shrb $0x1,(%rax),%r31b");
asm volatile("shrd $0x1,%r12,(%rax),%r31");
asm volatile("shrd $0x2,%r15d,(%rax),%r31d");
asm volatile("shrd $0x2,%r8w,%r12w,%r31w");
asm volatile("shrd %cl,%r12,%r16,%r8");
asm volatile("shrd %cl,%r13w,(%r19,%rax,4),%r31w");
asm volatile("shrd %cl,%r9w,(%rax),%r31w");
asm volatile("shrl $0x2,(%rax),%r31d");
asm volatile("shrw $0x1,(%rax),%r31w");
asm volatile("shrw %cl,(%r19,%rax,4),%r31w");
asm volatile("sub $0x1234,%ax,%r30w");
asm volatile("sub %r15b,%r17b,%r18b");
asm volatile("sub %r15d,(%r8),%r18d");
asm volatile("sub (%r15,%rax,1),%r16b,%r8b");
asm volatile("sub (%r15,%rax,1),%r16w,%r8w");
asm volatile("subl $0x11,(%r19,%rax,4),%r20d");
asm volatile("xor $0x1234,%ax,%r30w");
asm volatile("xor %r15b,%r17b,%r18b");
asm volatile("xor %r15d,(%r8),%r18d");
asm volatile("xor (%r15,%rax,1),%r16b,%r8b");
asm volatile("xor (%r15,%rax,1),%r16w,%r8w");
asm volatile("xorl $0x11,(%r19,%rax,4),%r20d");
/* APX suppress status flags */
asm volatile("{nf} add %bl,%dl,%r8b");
asm volatile("{nf} add %dx,%ax,%r9w");
asm volatile("{nf} add 0x123(%r8,%rax,4),%bl,%dl");
asm volatile("{nf} add 0x123(%r8,%rax,4),%dx,%ax");
asm volatile("{nf} or %bl,%dl,%r8b");
asm volatile("{nf} or %dx,%ax,%r9w");
asm volatile("{nf} or 0x123(%r8,%rax,4),%bl,%dl");
asm volatile("{nf} or 0x123(%r8,%rax,4),%dx,%ax");
asm volatile("{nf} and %bl,%dl,%r8b");
asm volatile("{nf} and %dx,%ax,%r9w");
asm volatile("{nf} and 0x123(%r8,%rax,4),%bl,%dl");
asm volatile("{nf} and 0x123(%r8,%rax,4),%dx,%ax");
asm volatile("{nf} shld $0x7b,%dx,%ax,%r9w");
asm volatile("{nf} sub %bl,%dl,%r8b");
asm volatile("{nf} sub %dx,%ax,%r9w");
asm volatile("{nf} sub 0x123(%r8,%rax,4),%bl,%dl");
asm volatile("{nf} sub 0x123(%r8,%rax,4),%dx,%ax");
asm volatile("{nf} shrd $0x7b,%dx,%ax,%r9w");
asm volatile("{nf} xor %bl,%dl,%r8b");
asm volatile("{nf} xor %r31,%r31");
asm volatile("{nf} xor 0x123(%r8,%rax,4),%bl,%dl");
asm volatile("{nf} xor 0x123(%r8,%rax,4),%dx,%ax");
asm volatile("{nf} imul $0xff90,%r9,%r15");
asm volatile("{nf} imul $0x7b,%r9,%r15");
asm volatile("{nf} xor $0x7b,%bl,%dl");
asm volatile("{nf} xor $0x7b,%dx,%ax");
asm volatile("{nf} popcnt %r9,%r31");
asm volatile("{nf} shld %cl,%dx,%ax,%r9w");
asm volatile("{nf} shrd %cl,%dx,%ax,%r9w");
asm volatile("{nf} imul %r9,%r31,%r11");
asm volatile("{nf} sar $0x7b,%bl,%dl");
asm volatile("{nf} sar $0x7b,%dx,%ax");
asm volatile("{nf} sar $1,%bl,%dl");
asm volatile("{nf} sar $1,%dx,%ax");
asm volatile("{nf} sar %cl,%bl,%dl");
asm volatile("{nf} sar %cl,%dx,%ax");
asm volatile("{nf} andn %r9,%r31,%r11");
asm volatile("{nf} blsi %r9,%r31");
asm volatile("{nf} tzcnt %r9,%r31");
asm volatile("{nf} lzcnt %r9,%r31");
asm volatile("{nf} idiv %bl");
asm volatile("{nf} idiv %dx");
asm volatile("{nf} dec %bl,%dl");
asm volatile("{nf} dec %dx,%ax");
#else /* #ifdef __x86_64__ */
/* bound r32, mem (same op code as EVEX prefix) */
@ -4848,6 +5354,97 @@ int main(void)
#endif /* #ifndef __x86_64__ */
/* Key Locker */
asm volatile(" loadiwkey %xmm1, %xmm2");
asm volatile(" encodekey128 %eax, %edx");
asm volatile(" encodekey256 %eax, %edx");
asm volatile(" aesenc128kl 0x77(%edx), %xmm3");
asm volatile(" aesenc256kl 0x77(%edx), %xmm3");
asm volatile(" aesdec128kl 0x77(%edx), %xmm3");
asm volatile(" aesdec256kl 0x77(%edx), %xmm3");
asm volatile(" aesencwide128kl 0x77(%edx)");
asm volatile(" aesencwide256kl 0x77(%edx)");
asm volatile(" aesdecwide128kl 0x77(%edx)");
asm volatile(" aesdecwide256kl 0x77(%edx)");
/* Remote Atomic Operations */
asm volatile("aadd %ecx,(%eax)");
asm volatile("aadd %edx,(0x12345678)");
asm volatile("aadd %edx,0x12345678(%eax,%ecx,8)");
asm volatile("aand %ecx,(%eax)");
asm volatile("aand %edx,(0x12345678)");
asm volatile("aand %edx,0x12345678(%eax,%ecx,8)");
asm volatile("aor %ecx,(%eax)");
asm volatile("aor %edx,(0x12345678)");
asm volatile("aor %edx,0x12345678(%eax,%ecx,8)");
asm volatile("axor %ecx,(%eax)");
asm volatile("axor %edx,(0x12345678)");
asm volatile("axor %edx,0x12345678(%eax,%ecx,8)");
/* AVX NE Convert */
asm volatile("vbcstnebf162ps (%ecx),%xmm6");
asm volatile("vbcstnesh2ps (%ecx),%xmm6");
asm volatile("vcvtneebf162ps (%ecx),%xmm6");
asm volatile("vcvtneeph2ps (%ecx),%xmm6");
asm volatile("vcvtneobf162ps (%ecx),%xmm6");
asm volatile("vcvtneoph2ps (%ecx),%xmm6");
asm volatile("vcvtneps2bf16 %xmm1,%xmm6");
/* AVX VNNI INT16 */
asm volatile("vpdpbssd %xmm1,%xmm2,%xmm3");
asm volatile("vpdpbssds %xmm1,%xmm2,%xmm3");
asm volatile("vpdpbsud %xmm1,%xmm2,%xmm3");
asm volatile("vpdpbsuds %xmm1,%xmm2,%xmm3");
asm volatile("vpdpbuud %xmm1,%xmm2,%xmm3");
asm volatile("vpdpbuuds %xmm1,%xmm2,%xmm3");
asm volatile("vpdpwsud %xmm1,%xmm2,%xmm3");
asm volatile("vpdpwsuds %xmm1,%xmm2,%xmm3");
asm volatile("vpdpwusd %xmm1,%xmm2,%xmm3");
asm volatile("vpdpwusds %xmm1,%xmm2,%xmm3");
asm volatile("vpdpwuud %xmm1,%xmm2,%xmm3");
asm volatile("vpdpwuuds %xmm1,%xmm2,%xmm3");
/* AVX IFMA */
asm volatile("vpmadd52huq %xmm1,%xmm2,%xmm3");
asm volatile("vpmadd52luq %xmm1,%xmm2,%xmm3");
/* AVX SHA512 */
asm volatile("vsha512msg1 %xmm1,%ymm2");
asm volatile("vsha512msg2 %ymm1,%ymm2");
asm volatile("vsha512rnds2 %xmm1,%ymm2,%ymm3");
/* AVX SM3 */
asm volatile("vsm3msg1 %xmm1,%xmm2,%xmm3");
asm volatile("vsm3msg2 %xmm1,%xmm2,%xmm3");
asm volatile("vsm3rnds2 $0xa1,%xmm1,%xmm2,%xmm3");
/* AVX SM4 */
asm volatile("vsm4key4 %xmm1,%xmm2,%xmm3");
asm volatile("vsm4rnds4 %xmm1,%xmm2,%xmm3");
/* Pre-fetch */
asm volatile("prefetch (%eax)");
asm volatile("prefetcht0 (%eax)");
asm volatile("prefetcht1 (%eax)");
asm volatile("prefetcht2 (%eax)");
asm volatile("prefetchnta (%eax)");
/* Non-serializing write MSR */
asm volatile("wrmsrns");
/* Prediction history reset */
asm volatile("hreset $0");