enc_base.go 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. package zstd
  2. import (
  3. "fmt"
  4. "math/bits"
  5. "github.com/klauspost/compress/zstd/internal/xxhash"
  6. )
  7. const (
  8. dictShardBits = 7
  9. )
  10. type fastBase struct {
  11. // cur is the offset at the start of hist
  12. cur int32
  13. // maximum offset. Should be at least 2x block size.
  14. maxMatchOff int32
  15. bufferReset int32
  16. hist []byte
  17. crc *xxhash.Digest
  18. tmp [8]byte
  19. blk *blockEnc
  20. lastDictID uint32
  21. lowMem bool
  22. }
  23. // CRC returns the underlying CRC writer.
  24. func (e *fastBase) CRC() *xxhash.Digest {
  25. return e.crc
  26. }
  27. // AppendCRC will append the CRC to the destination slice and return it.
  28. func (e *fastBase) AppendCRC(dst []byte) []byte {
  29. crc := e.crc.Sum(e.tmp[:0])
  30. dst = append(dst, crc[7], crc[6], crc[5], crc[4])
  31. return dst
  32. }
  33. // WindowSize returns the window size of the encoder,
  34. // or a window size small enough to contain the input size, if > 0.
  35. func (e *fastBase) WindowSize(size int64) int32 {
  36. if size > 0 && size < int64(e.maxMatchOff) {
  37. b := max(
  38. // Keep minimum window.
  39. int32(1)<<uint(bits.Len(uint(size))), 1024)
  40. return b
  41. }
  42. return e.maxMatchOff
  43. }
  44. // Block returns the current block.
  45. func (e *fastBase) Block() *blockEnc {
  46. return e.blk
  47. }
  48. func (e *fastBase) addBlock(src []byte) int32 {
  49. if debugAsserts && e.cur > e.bufferReset {
  50. panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, e.bufferReset))
  51. }
  52. // check if we have space already
  53. if len(e.hist)+len(src) > cap(e.hist) {
  54. if cap(e.hist) == 0 {
  55. e.ensureHist(len(src))
  56. } else {
  57. if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
  58. panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
  59. }
  60. // Move down
  61. offset := int32(len(e.hist)) - e.maxMatchOff
  62. copy(e.hist[0:e.maxMatchOff], e.hist[offset:])
  63. e.cur += offset
  64. e.hist = e.hist[:e.maxMatchOff]
  65. }
  66. }
  67. s := int32(len(e.hist))
  68. e.hist = append(e.hist, src...)
  69. return s
  70. }
  71. // ensureHist will ensure that history can keep at least this many bytes.
  72. func (e *fastBase) ensureHist(n int) {
  73. if cap(e.hist) >= n {
  74. return
  75. }
  76. l := e.maxMatchOff
  77. if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
  78. l += maxCompressedBlockSize
  79. } else {
  80. l += e.maxMatchOff
  81. }
  82. // Make it at least 1MB.
  83. if l < 1<<20 && !e.lowMem {
  84. l = 1 << 20
  85. }
  86. // Make it at least the requested size.
  87. if l < int32(n) {
  88. l = int32(n)
  89. }
  90. e.hist = make([]byte, 0, l)
  91. }
  92. // useBlock will replace the block with the provided one,
  93. // but transfer recent offsets from the previous.
  94. func (e *fastBase) UseBlock(enc *blockEnc) {
  95. enc.reset(e.blk)
  96. e.blk = enc
  97. }
  98. func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
  99. if debugAsserts {
  100. if s < 0 {
  101. err := fmt.Sprintf("s (%d) < 0", s)
  102. panic(err)
  103. }
  104. if t < 0 {
  105. err := fmt.Sprintf("t (%d) < 0", t)
  106. panic(err)
  107. }
  108. if s-t > e.maxMatchOff {
  109. err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff)
  110. panic(err)
  111. }
  112. if len(src)-int(s) > maxCompressedBlockSize {
  113. panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
  114. }
  115. }
  116. return int32(matchLen(src[s:], src[t:]))
  117. }
  118. // Reset the encoding table.
  119. func (e *fastBase) resetBase(d *dict, singleBlock bool) {
  120. if e.blk == nil {
  121. e.blk = &blockEnc{lowMem: e.lowMem}
  122. e.blk.init()
  123. } else {
  124. e.blk.reset(nil)
  125. }
  126. e.blk.initNewEncode()
  127. if e.crc == nil {
  128. e.crc = xxhash.New()
  129. } else {
  130. e.crc.Reset()
  131. }
  132. e.blk.dictLitEnc = nil
  133. if d != nil {
  134. low := e.lowMem
  135. if singleBlock {
  136. e.lowMem = true
  137. }
  138. e.ensureHist(d.ContentSize() + maxCompressedBlockSize)
  139. e.lowMem = low
  140. }
  141. // We offset current position so everything will be out of reach.
  142. // If above reset line, history will be purged.
  143. if e.cur < e.bufferReset {
  144. e.cur += e.maxMatchOff + int32(len(e.hist))
  145. }
  146. e.hist = e.hist[:0]
  147. if d != nil {
  148. // Set offsets (currently not used)
  149. for i, off := range d.offsets {
  150. e.blk.recentOffsets[i] = uint32(off)
  151. e.blk.prevRecentOffsets[i] = e.blk.recentOffsets[i]
  152. }
  153. // Transfer litenc.
  154. e.blk.dictLitEnc = d.litEnc
  155. e.hist = append(e.hist, d.content...)
  156. }
  157. }