level1.go 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. package flate
  2. import (
  3. "fmt"
  4. "github.com/klauspost/compress/internal/le"
  5. )
  6. // fastGen maintains the table for matches,
  7. // and the previous byte block for level 2.
  8. // This is the generic implementation.
  9. type fastEncL1 struct {
  10. fastGen
  11. table [tableSize]tableEntry
  12. }
  13. // EncodeL1 uses a similar algorithm to level 1
  14. func (e *fastEncL1) Encode(dst *tokens, src []byte) {
  15. const (
  16. inputMargin = 12 - 1
  17. minNonLiteralBlockSize = 1 + 1 + inputMargin
  18. hashBytes = 5
  19. )
  20. if debugDeflate && e.cur < 0 {
  21. panic(fmt.Sprint("e.cur < 0: ", e.cur))
  22. }
  23. // Protect against e.cur wraparound.
  24. for e.cur >= bufferReset {
  25. if len(e.hist) == 0 {
  26. for i := range e.table[:] {
  27. e.table[i] = tableEntry{}
  28. }
  29. e.cur = maxMatchOffset
  30. break
  31. }
  32. // Shift down everything in the table that isn't already too far away.
  33. minOff := e.cur + int32(len(e.hist)) - maxMatchOffset
  34. for i := range e.table[:] {
  35. v := e.table[i].offset
  36. if v <= minOff {
  37. v = 0
  38. } else {
  39. v = v - e.cur + maxMatchOffset
  40. }
  41. e.table[i].offset = v
  42. }
  43. e.cur = maxMatchOffset
  44. }
  45. s := e.addBlock(src)
  46. // This check isn't in the Snappy implementation, but there, the caller
  47. // instead of the callee handles this case.
  48. if len(src) < minNonLiteralBlockSize {
  49. // We do not fill the token table.
  50. // This will be picked up by caller.
  51. dst.n = uint16(len(src))
  52. return
  53. }
  54. // Override src
  55. src = e.hist
  56. nextEmit := s
  57. // sLimit is when to stop looking for offset/length copies. The inputMargin
  58. // lets us use a fast path for emitLiteral in the main loop, while we are
  59. // looking for copies.
  60. sLimit := int32(len(src) - inputMargin)
  61. // nextEmit is where in src the next emitLiteral should start from.
  62. cv := load6432(src, s)
  63. for {
  64. const skipLog = 5
  65. const doEvery = 2
  66. nextS := s
  67. var candidate tableEntry
  68. var t int32
  69. for {
  70. nextHash := hashLen(cv, tableBits, hashBytes)
  71. candidate = e.table[nextHash]
  72. nextS = s + doEvery + (s-nextEmit)>>skipLog
  73. if nextS > sLimit {
  74. goto emitRemainder
  75. }
  76. now := load6432(src, nextS)
  77. e.table[nextHash] = tableEntry{offset: s + e.cur}
  78. nextHash = hashLen(now, tableBits, hashBytes)
  79. t = candidate.offset - e.cur
  80. if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
  81. e.table[nextHash] = tableEntry{offset: nextS + e.cur}
  82. break
  83. }
  84. // Do one right away...
  85. cv = now
  86. s = nextS
  87. nextS++
  88. candidate = e.table[nextHash]
  89. now >>= 8
  90. e.table[nextHash] = tableEntry{offset: s + e.cur}
  91. t = candidate.offset - e.cur
  92. if s-t < maxMatchOffset && uint32(cv) == load3232(src, t) {
  93. e.table[nextHash] = tableEntry{offset: nextS + e.cur}
  94. break
  95. }
  96. cv = now
  97. s = nextS
  98. }
  99. // A 4-byte match has been found. We'll later see if more than 4 bytes
  100. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  101. // them as literal bytes.
  102. for {
  103. // Invariant: we have a 4-byte match at s, and no need to emit any
  104. // literal bytes prior to s.
  105. // Extend the 4-byte match as long as possible.
  106. l := e.matchlenLong(int(s+4), int(t+4), src) + 4
  107. // Extend backwards
  108. for t > 0 && s > nextEmit && le.Load8(src, t-1) == le.Load8(src, s-1) {
  109. s--
  110. t--
  111. l++
  112. }
  113. if nextEmit < s {
  114. if false {
  115. emitLiteral(dst, src[nextEmit:s])
  116. } else {
  117. for _, v := range src[nextEmit:s] {
  118. dst.tokens[dst.n] = token(v)
  119. dst.litHist[v]++
  120. dst.n++
  121. }
  122. }
  123. }
  124. // Save the match found
  125. if false {
  126. dst.AddMatchLong(l, uint32(s-t-baseMatchOffset))
  127. } else {
  128. // Inlined...
  129. xoffset := uint32(s - t - baseMatchOffset)
  130. xlength := l
  131. oc := offsetCode(xoffset)
  132. xoffset |= oc << 16
  133. for xlength > 0 {
  134. xl := xlength
  135. if xl > 258 {
  136. if xl > 258+baseMatchLength {
  137. xl = 258
  138. } else {
  139. xl = 258 - baseMatchLength
  140. }
  141. }
  142. xlength -= xl
  143. xl -= baseMatchLength
  144. dst.extraHist[lengthCodes1[uint8(xl)]]++
  145. dst.offHist[oc]++
  146. dst.tokens[dst.n] = token(matchType | uint32(xl)<<lengthShift | xoffset)
  147. dst.n++
  148. }
  149. }
  150. s += l
  151. nextEmit = s
  152. if nextS >= s {
  153. s = nextS + 1
  154. }
  155. if s >= sLimit {
  156. // Index first pair after match end.
  157. if int(s+l+8) < len(src) {
  158. cv := load6432(src, s)
  159. e.table[hashLen(cv, tableBits, hashBytes)] = tableEntry{offset: s + e.cur}
  160. }
  161. goto emitRemainder
  162. }
  163. // We could immediately start working at s now, but to improve
  164. // compression we first update the hash table at s-2 and at s. If
  165. // another emitCopy is not our next move, also calculate nextHash
  166. // at s+1. At least on GOARCH=amd64, these three hash calculations
  167. // are faster as one load64 call (with some shifts) instead of
  168. // three load32 calls.
  169. x := load6432(src, s-2)
  170. o := e.cur + s - 2
  171. prevHash := hashLen(x, tableBits, hashBytes)
  172. e.table[prevHash] = tableEntry{offset: o}
  173. x >>= 16
  174. currHash := hashLen(x, tableBits, hashBytes)
  175. candidate = e.table[currHash]
  176. e.table[currHash] = tableEntry{offset: o + 2}
  177. t = candidate.offset - e.cur
  178. if s-t > maxMatchOffset || uint32(x) != load3232(src, t) {
  179. cv = x >> 8
  180. s++
  181. break
  182. }
  183. }
  184. }
  185. emitRemainder:
  186. if int(nextEmit) < len(src) {
  187. // If nothing was added, don't encode literals.
  188. if dst.n == 0 {
  189. return
  190. }
  191. emitLiteral(dst, src[nextEmit:])
  192. }
  193. }