Skip to content

Commit

Permalink
s2: Don't use stack for index tables (#1014)
Browse files Browse the repository at this point in the history
* s2: Don't use stack for index tables

Provide a pooled array pointer for tables instead of using stack.

Seems like Go is still unstable with large stacks, so use alternative method.
  • Loading branch information
klauspost authored Oct 4, 2024
1 parent f73ab1e commit dbd6c38
Show file tree
Hide file tree
Showing 8 changed files with 11,284 additions and 11,011 deletions.
53 changes: 38 additions & 15 deletions s2/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ func main() {
o.maxSkip = 100
o.genEncodeBetterBlockAsm("encodeSnappyBetterBlockAsm", 17, 14, 7, 7, limit14B)
o.maxSkip = 0
o.genEncodeBetterBlockAsm("encodeSnappyBetterBlockAsm64K", 16, 14, 7, 7, 64<<10-1)
o.genEncodeBetterBlockAsm("encodeSnappyBetterBlockAsm64K", 16, 13, 7, 7, 64<<10-1)
o.genEncodeBetterBlockAsm("encodeSnappyBetterBlockAsm12B", 14, 12, 6, 6, limit12B)
o.genEncodeBetterBlockAsm("encodeSnappyBetterBlockAsm10B", 12, 10, 5, 6, limit10B)
o.genEncodeBetterBlockAsm("encodeSnappyBetterBlockAsm8B", 10, 8, 4, 6, limit8B)
Expand Down Expand Up @@ -146,6 +146,15 @@ func assert(fn func(ok LabelRef)) {
}
}

type regTable struct {
r reg.Register
disp int
}

func (r regTable) Idx(idx reg.GPVirtual, scale uint8) Mem {
return Mem{Base: r.r, Index: idx, Scale: scale, Disp: r.disp}
}

type options struct {
snappy bool
bmi1 bool
Expand All @@ -163,7 +172,15 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
if o.skipOutput {
dstTxt = ""
}
TEXT(name, 0, "func("+dstTxt+"src []byte) int")

var tableSize = 4 * (1 << tableBits)
// Memzero needs at least 128 bytes.
if tableSize < 128 {
panic("tableSize must be at least 128 bytes")
}

arrPtr := fmt.Sprintf(",tmp *[%d]byte", tableSize)
TEXT(name, 0, "func("+dstTxt+"src []byte"+arrPtr+") int")
Doc(name+" encodes a non-empty src to a guaranteed-large-enough dst.",
fmt.Sprintf("Maximum input %d bytes.", maxLen),
"It assumes that the varint-encoded length of the decompressed bytes has already been written.", "")
Expand All @@ -173,7 +190,6 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
o.maxOffset = maxLen - 1
var literalMaxOverhead = maxLitOverheadFor(maxLen)

var tableSize = 4 * (1 << tableBits)
// Memzero needs at least 128 bytes.
if tableSize < 128 {
panic("tableSize must be at least 128 bytes")
Expand Down Expand Up @@ -209,8 +225,8 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
// nextSTempL keeps nextS while other functions are being called.
nextSTempL := AllocLocal(4)

// Alloc table last
table := AllocLocal(tableSize)
// Load pointer to temp table
table := regTable{r: Load(Param("tmp"), GP64())}

dst := GP64()
if !o.skipOutput {
Expand All @@ -236,7 +252,7 @@ func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, m
iReg := GP64()
MOVQ(U32(tableSize/8/16), iReg)
tablePtr := GP64()
LEAQ(table, tablePtr)
MOVQ(table.r, tablePtr)
zeroXmm := XMM()
PXOR(zeroXmm, zeroXmm)

Expand Down Expand Up @@ -855,7 +871,17 @@ func maxLitOverheadFor(n int) int {
}

func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, skipLog, lHashBytes, maxLen int) {
TEXT(name, 0, "func(dst, src []byte) int")
var lTableSize = 4 * (1 << lTableBits)
var sTableSize = 4 * (1 << sTableBits)
tableSize := lTableSize + sTableSize

// Memzero needs at least 128 bytes.
if tableSize < 128 {
panic("tableSize must be at least 128 bytes")
}
arrPtr := fmt.Sprintf(", tmp *[%d]byte", tableSize)

TEXT(name, 0, "func(dst, src []byte"+arrPtr+") int")
Doc(name+" encodes a non-empty src to a guaranteed-large-enough dst.",
fmt.Sprintf("Maximum input %d bytes.", maxLen),
"It assumes that the varint-encoded length of the decompressed bytes has already been written.", "")
Expand All @@ -870,9 +896,6 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
o.maxLen = maxLen
o.maxOffset = maxLen - 1

var lTableSize = 4 * (1 << lTableBits)
var sTableSize = 4 * (1 << sTableBits)

// Memzero needs at least 128 bytes.
if (lTableSize + sTableSize) < 128 {
panic("tableSize must be at least 128 bytes")
Expand Down Expand Up @@ -905,9 +928,9 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
// nextSTempL keeps nextS while other functions are being called.
nextSTempL := AllocLocal(4)

// Alloc table last, lTab must be before sTab.
lTab := AllocLocal(lTableSize)
sTab := AllocLocal(sTableSize)
table := Load(Param("tmp"), GP64())
lTab := regTable{r: table}
sTab := regTable{r: table, disp: lTableSize}

dst := GP64()
{
Expand All @@ -930,7 +953,7 @@ func (o options) genEncodeBetterBlockAsm(name string, lTableBits, sTableBits, sk
iReg := GP64()
MOVQ(U32((sTableSize+lTableSize)/8/16), iReg)
tablePtr := GP64()
LEAQ(lTab, tablePtr)
MOVQ(table, tablePtr)
zeroXmm := XMM()
PXOR(zeroXmm, zeroXmm)

Expand Down Expand Up @@ -2916,7 +2939,7 @@ func (o options) cvtLZ4BlockAsm(lz4s bool) {
TEXT("cvt"+srcAlgo+"Block"+snap, NOSPLIT, "func(dst, src []byte) (uncompressed int, dstUsed int)")
Doc("cvt"+srcAlgo+"Block converts an "+srcAlgo+" block to "+dstAlgo, "")
Pragma("noescape")
o.outputMargin = 10
o.outputMargin = 8
o.maxOffset = math.MaxUint16

const (
Expand Down
25 changes: 23 additions & 2 deletions s2/encode.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ import (
"encoding/binary"
"math"
"math/bits"
"sync"

"github.com/klauspost/compress/internal/race"
)

// Encode returns the encoded form of src. The returned slice may be a sub-
Expand Down Expand Up @@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte {
return dst[:d]
}

var estblockPool [2]sync.Pool

// EstimateBlockSize will perform a very fast compression
// without outputting the result and return the compressed output size.
// The function returns -1 if no improvement could be achieved.
Expand All @@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) {
return -1
}
if len(src) <= 1024 {
d = calcBlockSizeSmall(src)
const sz, pool = 2048, 0
tmp, ok := estblockPool[pool].Get().(*[sz]byte)
if !ok {
tmp = &[sz]byte{}
}
race.WriteSlice(tmp[:])
defer estblockPool[pool].Put(tmp)

d = calcBlockSizeSmall(src, tmp)
} else {
d = calcBlockSize(src)
const sz, pool = 32768, 1
tmp, ok := estblockPool[pool].Get().(*[sz]byte)
if !ok {
tmp = &[sz]byte{}
}
race.WriteSlice(tmp[:])
defer estblockPool[pool].Put(tmp)

d = calcBlockSize(src, tmp)
}

if d == 0 {
Expand Down
Loading

0 comments on commit dbd6c38

Please sign in to comment.