Skip to content

Commit

Permalink
Merge branch 'dev'
Browse files Browse the repository at this point in the history
  • Loading branch information
herumi committed Oct 15, 2024
2 parents d067f0d + 2d70c94 commit 3ee31be
Show file tree
Hide file tree
Showing 18 changed files with 624 additions and 131 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.5)

project(xbyak LANGUAGES CXX VERSION 7.10)
project(xbyak LANGUAGES CXX VERSION 7.20)

file(GLOB headers xbyak/*.h)

Expand Down
2 changes: 2 additions & 0 deletions doc/changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# History

* 2024/Oct/15 ver 7.20 Fixed the specification of setDefaultEncoding, setDefaultEncodingAVX10./
* 2024/Oct/15 ver 7.11 Added full support for AVX10.2
* 2024/Oct/13 ver 7.10 support AVX10 integer and fp16 vnni, media new instructions. setDefaultEncoding is extended.
* 2024/Oct/10 ver 7.09.1 fix the names of vpcompressb and vpcompressw
* 2024/Oct/08 ver 7.09 support YMM embedded rounding of AVX10.2 and fix some mnemonics with {sae}/{er}.
Expand Down
101 changes: 57 additions & 44 deletions doc/usage.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Usage

Inherit `Xbyak::CodeGenerator` class and make the class method.
```
```cpp
#include <xbyak/xbyak.h>

struct Code : Xbyak::CodeGenerator {
Expand All @@ -13,7 +13,7 @@ struct Code : Xbyak::CodeGenerator {
};
```
Or you can pass the instance of CodeGenerator without inheriting.
```
```cpp
void genCode(Xbyak::CodeGenerator& code, int x) {
using namespace Xbyak::util;
code.mov(eax, x);
Expand All @@ -23,7 +23,7 @@ void genCode(Xbyak::CodeGenerator& code, int x) {

Make an instance of the class and get the function
pointer by calling `getCode()` and call it.
```
```cpp
Code c(5);
int (*f)() = c.getCode<int (*)()>();
printf("ret=%d\n", f()); // ret = 5
Expand All @@ -32,7 +32,7 @@ printf("ret=%d\n", f()); // ret = 5
## Syntax
Similar to MASM/NASM syntax with parentheses.
```
```cpp
NASM Xbyak
mov eax, ebx --> mov(eax, ebx);
inc ecx inc(ecx);
Expand All @@ -43,7 +43,7 @@ ret --> ret();
Use `qword`, `dword`, `word` and `byte` if it is necessary to specify the size of memory,
otherwise use `ptr`.

```
```cpp
(ptr|qword|dword|word|byte) [base + index * (1|2|4|8) + displacement]
[rip + 32bit disp] ; x64 only

Expand All @@ -53,19 +53,21 @@ mov al, [ebx+ecx] --> mov(al, ptr [ebx + ecx]);
test byte [esp], 4 --> test(byte [esp], 4);
inc qword [rax] --> inc(qword [rax]);
```
**Note**: `qword`, ... are member variables, then don't use `dword` as unsigned int type.
### How to use Selector (Segment Register)
```
```cpp
mov eax, [fs:eax] --> putSeg(fs);
mov(eax, ptr [eax]);
mov ax, cs --> mov(ax, cs);
```

**Note**: Segment class is not derived from `Operand`.

## AVX

```
```cpp
vaddps(xmm1, xmm2, xmm3); // xmm1 <- xmm2 + xmm3
vaddps(xmm2, xmm3, ptr [rax]); // use ptr to access memory
vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
Expand All @@ -74,13 +76,13 @@ vgatherdpd(xmm1, ptr [ebp + 256 + xmm2*4], xmm3);
**Note**:
If `XBYAK_ENABLE_OMITTED_OPERAND` is defined, then you can use two operand version for backward compatibility.
But the newer version will not support it.
```
```cpp
vaddps(xmm2, xmm3); // xmm2 <- xmm2 + xmm3
```

## AVX-512

```
```cpp
vaddpd zmm2, zmm5, zmm30 --> vaddpd(zmm2, zmm5, zmm30);
vaddpd xmm30, xmm20, [rax] --> vaddpd(xmm30, xmm20, ptr [rax]);
vaddps xmm30, xmm20, [rax] --> vaddps(xmm30, xmm20, ptr [rax]);
Expand Down Expand Up @@ -108,35 +110,44 @@ vfpclasspd k5{k3}, [rax+64]{1to2}, 5 --> vfpclasspd(k5|k3, xword_b [rax+64],
vfpclassps k5{k3}, [rax+64]{1to4}, 5 --> vfpclassps(k5|k3, yword_b [rax+64], 5); // broadcast 64-bit to 256-bit
```
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8 etc.
Some mnemonics have two types of encodings: VEX and EVEX.
## Selecting AVX512-VNNI, AVX-VNNI, AVX-VNNI-INT8, AVX10.2.
Some mnemonics have some types of encodings: VEX, EVEX, AVX10.2.
The functions for these mnemonics include an optional parameter as the last argument to specify the encoding.
The default behavior depends on the order in which the instruction was introduced (whether VEX or EVEX came first),
The default behavior depends on the order in which the instruction was introduced (whether VEX, EVEX or AVX10.2 came first),
and can be specified using setDefaultEncoding.
```
```cpp
vpdpbusd(xm0, xm1, xm2); // default encoding: EVEX (AVX512-VNNI)
vpdpbusd(xm0, xm1, xm2, EvexEncoding); // same as the above
vpdpbusd(xm0, xm1, xm2, VexEncoding); // VEX (AVX-VNNI)
setDefaultEncoding(VexEncoding); // default encoding is VEX
setDefaultEncoding(VexEncoding); // change default encoding
vpdpbusd(xm0, xm1, xm2); // VEX
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: VEX (AVX-VNNI)
vmpsadbw(xm1, xm3, xm15, 3, VexEncoding); // same as the above
vmpsadbw(xm1, xm3, xm15, 3, EvexEncoding); // EVEX (AVX10.2)
setDefaultEncoding(VexEncoding, EvexEncoding); // use 2nd argument.
vmpsadbw(xm1, xm3, xm15, 3); // EVEX
vmpsadbw(xm1, xm3, xm15, 3); // default encoding: AVX
vmpsadbw(xm1, xm3, xm15, 3, PreAVX10v2Encoding); // same as the above
vmpsadbw(xm1, xm3, xm15, 3, AVX10v2Encoding); // AVX10.2
setDefaultEncodingAVX10(AVX10v2Encoding); // change default encoding
vmpsadbw(xm1, xm3, xm15, 3); // AVX10.2
```

- `setDefaultEncoding(PreferredEncoding vnniEnc = EvexEncoding, PreferredEncoding avx10Enc = VexEncoding)`
Control the default encoding of mnemonics with `Xbyak::PreferredEncoding` param.
- `setDefaultEncoding(PreferredEncoding enc = EvexEncoding)`
- Configure encoding for AVX512-VNNI or AVX-VNNI instructions.
- `setDefaultEncodingAVX10(PreferredEncoding enc = PreAVXv2Encoding)`
- Configure encoding for pre-AVX10.2 and AVX10.2 instructions.

`setDefaultEncoding`|EvexEncoding (default)|VexEncoding
-|-|-
feature|AVX512-VNNI|AVX-VNNI

- Target functions: vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds

param|vnniEnc|avx10Enc
`setDefaultEncodingAVX10`|PreAVX10v2Encoding (default)|AVX10v2Encoding
-|-|-
EvexEncoding|AVX512-VNNI|AVX10.2
VexEncoding|AVX-VNNI|AVX-VNNI-INT8
default|EvexEncoding|VexEncoding
mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds
feature|AVX-VNNI-INT8, AVX512-FP16|AVX10.2

- Target functions: vmpsadbw, vpdpbssd, vpdpbssds, vpdpbsud, vpdpbsuds, vpdpbuud, vpdpbuuds, vpdpwsud vpdpwsuds vpdpwusd vpdpwusds vpdpwuud, vpdpwuuds, vmovd, vmovw

- Remark: vmovd and vmovw several kinds of encoding such as AVX/AVX512F/AVX512-FP16/AVX10.2.

### Remark
* `k1`, ..., `k7` are opmask registers.
Expand Down Expand Up @@ -179,7 +190,7 @@ mnemonic|vpdpbusd, vpdpbusds, vpdpwssd, vpdpwssds|vmpsadbw, vpdpbssd, vpdpbssds,
Two kinds of Label are supported. (String literal and Label class).

### String literal
```
```cpp
L("L1");
jmp("L1");

Expand All @@ -201,7 +212,7 @@ L("L3");
### Support `@@`, `@f`, `@b` like MASM
```
```cpp
L("@@"); // <A>
jmp("@b"); // jmp to <A>
jmp("@f"); // jmp to <B>
Expand All @@ -217,7 +228,7 @@ Label symbols beginning with a period between `inLocalLabel()` and `outLocalLabe
are treated as a local label.
`inLocalLabel()` and `outLocalLabel()` can be nested.

```
```cpp
void func1()
{
inLocalLabel();
Expand All @@ -240,7 +251,7 @@ void func1()
Xbyak deals with jump mnemonics of an undefined label as short jump if no type is specified.
So if the size between jmp and label is larger than 127 byte, then xbyak will cause an error.

```
```cpp
jmp("short-jmp"); // short jmp
// small code
L("short-jmp");
Expand All @@ -249,14 +260,16 @@ jmp("long-jmp");
// long code
L("long-jmp"); // throw exception
```
Then specify T_NEAR for jmp.
```
```cpp
jmp("long-jmp", T_NEAR); // long jmp
// long code
L("long-jmp");
```

Or call `setDefaultJmpNEAR(true);` once, then the default type is set to T_NEAR.
```
```cpp
jmp("long-jmp"); // long jmp
// long code
L("long-jmp");
Expand All @@ -266,7 +279,7 @@ L("long-jmp");
`L()` and `jxx()` support Label class.
```
```cpp
Xbyak::Label label1, label2;
L(label1);
...
Expand All @@ -278,7 +291,7 @@ L(label2);
```

Use `putL` for jmp table
```
```cpp
Label labelTbl, L0, L1, L2;
mov(rax, labelTbl);
// rdx is an index of jump table
Expand All @@ -295,7 +308,7 @@ L(L1);
`assignL(dstLabel, srcLabel)` binds dstLabel with srcLabel.
```
```cpp
Label label2;
Label label1 = L(); // make label1 ; same to Label label1; L(label1);
...
Expand All @@ -310,7 +323,7 @@ The `jmp` in the above code jumps to label1 assigned by `assignL`.
* dstLabel must not be used in `L()`.

`Label::getAddress()` returns the address specified by the label instance and 0 if not specified.
```
```cpp
// not AutoGrow mode
Label label;
assert(label.getAddress() == 0);
Expand All @@ -319,15 +332,15 @@ assert(label.getAddress() == getCurr());
```
### Rip ; relative addressing
```
```cpp
Label label;
mov(eax, ptr [rip + label]); // eax = 4
...
L(label);
dd(4);
```
```
```cpp
int x;
...
mov(eax, ptr[rip + &x]); // throw exception if the difference between &x and current position is larger than 2GiB
Expand All @@ -338,13 +351,13 @@ int x;
Use `word|dword|qword` instead of `ptr` to specify the address size.
### 32 bit mode
```
```cpp
jmp(word[eax], T_FAR); // jmp m16:16(FF /5)
jmp(dword[eax], T_FAR); // jmp m16:32(FF /5)
```

### 64 bit mode
```
```cpp
jmp(word[rax], T_FAR); // jmp m16:16(FF /5)
jmp(dword[rax], T_FAR); // jmp m16:32(FF /5)
jmp(qword[rax], T_FAR); // jmp m16:64(REX.W FF /5)
Expand All @@ -355,7 +368,7 @@ The same applies to `call`.
The default max code size is 4096 bytes.
Specify the size in constructor of `CodeGenerator()` if necessary.
```
```cpp
class Quantize : public Xbyak::CodeGenerator {
public:
Quantize()
Expand All @@ -372,7 +385,7 @@ You can make jit code on prepared memory.

Call `setProtectModeRE` yourself to change memory mode if using the prepared memory.

```
```cpp
uint8_t alignas(4096) buf[8192]; // C++11 or later

struct Code : Xbyak::CodeGenerator {
Expand All @@ -398,7 +411,7 @@ int main()
The memory region for jit is automatically extended if necessary when `AutoGrow` is specified in a constructor of `CodeGenerator`.
Call `ready()` or `readyRE()` before calling `getCode()` to fix jump address.
```
```cpp
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(<default memory size>, Xbyak::AutoGrow)
Expand All @@ -419,7 +432,7 @@ Xbyak set Read/Write/Exec mode to memory to run jit code.
If you want to use Read/Exec mode for security, then specify `DontSetProtectRWE` for `CodeGenerator` and
call `setProtectModeRE()` after generating jit code.

```
```cpp
struct Code : Xbyak::CodeGenerator {
Code()
: Xbyak::CodeGenerator(4096, Xbyak::DontSetProtectRWE)
Expand Down
Loading

0 comments on commit 3ee31be

Please sign in to comment.