Skip to content

Commit

Permalink
zstd: BuildDict fails with RLE table (#951)
Browse files Browse the repository at this point in the history
* zstd: BuildDict fails with RLE table

We cannot build a useable table if RLE. Add a fake entry to generate valid tables.

* Prevent offsets longer than dict to be selected.
  • Loading branch information
klauspost authored Jun 12, 2024
1 parent 8411e1d commit 0396178
Showing 1 changed file with 31 additions and 0 deletions.
31 changes: 31 additions & 0 deletions zstd/dict.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
enc.Encode(&block, b)
addValues(&remain, block.literals)
litTotal += len(block.literals)
if len(block.sequences) == 0 {
continue
}
seqs += len(block.sequences)
block.genCodes()
addHist(&ll, block.coders.llEnc.Histogram())
Expand All @@ -286,6 +289,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
if offset == 0 {
continue
}
if int(offset) >= len(o.History) {
continue
}
if offset > 3 {
newOffsets[offset-3]++
} else {
Expand Down Expand Up @@ -336,6 +342,9 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
if seqs/nUsed < 512 {
// Use 512 as minimum.
nUsed = seqs / 512
if nUsed == 0 {
nUsed = 1
}
}
copyHist := func(dst *fseEncoder, src *[256]int) ([]byte, error) {
hist := dst.Histogram()
Expand All @@ -358,6 +367,28 @@ func BuildDict(o BuildDictOptions) ([]byte, error) {
fakeLength += v
hist[i] = uint32(v)
}

// Ensure we aren't trying to represent RLE.
if maxCount == fakeLength {
for i := range hist {
if uint8(i) == maxSym {
fakeLength++
maxSym++
hist[i+1] = 1
if maxSym > 1 {
break
}
}
if hist[0] == 0 {
fakeLength++
hist[i] = 1
if maxSym > 1 {
break
}
}
}
}

dst.HistogramFinished(maxSym, maxCount)
dst.reUsed = false
dst.useRLE = false
Expand Down

0 comments on commit 0396178

Please sign in to comment.