Skip to content

Commit

Permalink
AArch64: use CAS instead of LDXR/STXR if available
Browse files Browse the repository at this point in the history
This covers 128-bit loads, and atomicrmw operations without a single native
instruction. Using CAS saves has a better chance of succeeding with high
contention on some systems.
  • Loading branch information
TNorthover committed Dec 14, 2022
1 parent 247d8d4 commit 10d34f5
Show file tree
Hide file tree
Showing 6 changed files with 641 additions and 207 deletions.
11 changes: 8 additions & 3 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21903,7 +21903,10 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
return AtomicExpansionKind::CmpXChg;

return AtomicExpansionKind::LLSC;
// Using CAS for an atomic load has a better chance of succeeding under high
// contention situations. So use it if available.
return Subtarget->hasLSE() ? AtomicExpansionKind::CmpXChg
: AtomicExpansionKind::LLSC;
}

// For the real atomic operations, we have ldxr/stxr up to 128 bits,
Expand Down Expand Up @@ -21940,8 +21943,10 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// implement atomicrmw without spilling. If the target address is also on the
// stack and close enough to the spill slot, this can lead to a situation
// where the monitor always gets cleared and the atomic operation can never
// succeed. So at -O0 lower this operation to a CAS loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
// we have a single CAS instruction that can replace the loop.
if (getTargetMachine().getOptLevel() == CodeGenOpt::None ||
Subtarget->hasLSE())
return AtomicExpansionKind::CmpXChg;

return AtomicExpansionKind::LLSC;
Expand Down
13 changes: 5 additions & 8 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -360,14 +360,11 @@ define void @atomic_load_relaxed(i64, i64, ptr %p, ptr %p2) {
;
; CHECK-CAS-O1-LABEL: atomic_load_relaxed:
; CHECK-CAS-O1: // %bb.0:
; CHECK-CAS-O1-NEXT: .LBB4_1: // %atomicrmw.start
; CHECK-CAS-O1-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-CAS-O1-NEXT: ldxp x9, x8, [x2]
; CHECK-CAS-O1-NEXT: stxp w10, x9, x8, [x2]
; CHECK-CAS-O1-NEXT: cbnz w10, .LBB4_1
; CHECK-CAS-O1-NEXT: // %bb.2: // %atomicrmw.end
; CHECK-CAS-O1-NEXT: mov v0.d[0], x9
; CHECK-CAS-O1-NEXT: mov v0.d[1], x8
; CHECK-CAS-O1-NEXT: mov x0, xzr
; CHECK-CAS-O1-NEXT: mov x1, xzr
; CHECK-CAS-O1-NEXT: casp x0, x1, x0, x1, [x2]
; CHECK-CAS-O1-NEXT: mov v0.d[0], x0
; CHECK-CAS-O1-NEXT: mov v0.d[1], x1
; CHECK-CAS-O1-NEXT: str q0, [x3]
; CHECK-CAS-O1-NEXT: ret
;
Expand Down
28 changes: 16 additions & 12 deletions llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -362,15 +362,17 @@ define i32 @fetch_and_nand(ptr %p) #0 {
;
; CHECK-LSE-O1-LABEL: fetch_and_nand:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov x8, x0
; CHECK-LSE-O1-NEXT: ldr w0, [x0]
; CHECK-LSE-O1-NEXT: LBB6_1: ; %atomicrmw.start
; CHECK-LSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-LSE-O1-NEXT: ldxr w8, [x0]
; CHECK-LSE-O1-NEXT: and w9, w8, #0x7
; CHECK-LSE-O1-NEXT: mvn w9, w9
; CHECK-LSE-O1-NEXT: stlxr w10, w9, [x0]
; CHECK-LSE-O1-NEXT: cbnz w10, LBB6_1
; CHECK-LSE-O1-NEXT: mov x9, x0
; CHECK-LSE-O1-NEXT: and w10, w0, #0x7
; CHECK-LSE-O1-NEXT: mvn w10, w10
; CHECK-LSE-O1-NEXT: casl w0, w10, [x8]
; CHECK-LSE-O1-NEXT: cmp w0, w9
; CHECK-LSE-O1-NEXT: b.ne LBB6_1
; CHECK-LSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
; CHECK-LSE-O1-NEXT: mov x0, x8
; CHECK-LSE-O1-NEXT: ret
;
; CHECK-LSE-O0-LABEL: fetch_and_nand:
Expand Down Expand Up @@ -455,15 +457,17 @@ define i64 @fetch_and_nand_64(ptr %p) #0 {
;
; CHECK-LSE-O1-LABEL: fetch_and_nand_64:
; CHECK-LSE-O1: ; %bb.0:
; CHECK-LSE-O1-NEXT: mov x8, x0
; CHECK-LSE-O1-NEXT: ldr x0, [x0]
; CHECK-LSE-O1-NEXT: LBB7_1: ; %atomicrmw.start
; CHECK-LSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-LSE-O1-NEXT: ldaxr x8, [x0]
; CHECK-LSE-O1-NEXT: and x9, x8, #0x7
; CHECK-LSE-O1-NEXT: mvn x9, x9
; CHECK-LSE-O1-NEXT: stlxr w10, x9, [x0]
; CHECK-LSE-O1-NEXT: cbnz w10, LBB7_1
; CHECK-LSE-O1-NEXT: mov x9, x0
; CHECK-LSE-O1-NEXT: and x10, x0, #0x7
; CHECK-LSE-O1-NEXT: mvn x10, x10
; CHECK-LSE-O1-NEXT: casal x0, x10, [x8]
; CHECK-LSE-O1-NEXT: cmp x0, x9
; CHECK-LSE-O1-NEXT: b.ne LBB7_1
; CHECK-LSE-O1-NEXT: ; %bb.2: ; %atomicrmw.end
; CHECK-LSE-O1-NEXT: mov x0, x8
; CHECK-LSE-O1-NEXT: ret
;
; CHECK-LSE-O0-LABEL: fetch_and_nand_64:
Expand Down
Loading

0 comments on commit 10d34f5

Please sign in to comment.