stateless.go 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313
  1. package flate
  2. import (
  3. "io"
  4. "math"
  5. "sync"
  6. "github.com/klauspost/compress/internal/le"
  7. )
  8. const (
  9. maxStatelessBlock = math.MaxInt16
  10. // dictionary will be taken from maxStatelessBlock, so limit it.
  11. maxStatelessDict = 8 << 10
  12. slTableBits = 13
  13. slTableSize = 1 << slTableBits
  14. slTableShift = 32 - slTableBits
  15. )
  16. type statelessWriter struct {
  17. dst io.Writer
  18. closed bool
  19. }
  20. func (s *statelessWriter) Close() error {
  21. if s.closed {
  22. return nil
  23. }
  24. s.closed = true
  25. // Emit EOF block
  26. return StatelessDeflate(s.dst, nil, true, nil)
  27. }
  28. func (s *statelessWriter) Write(p []byte) (n int, err error) {
  29. err = StatelessDeflate(s.dst, p, false, nil)
  30. if err != nil {
  31. return 0, err
  32. }
  33. return len(p), nil
  34. }
  35. func (s *statelessWriter) Reset(w io.Writer) {
  36. s.dst = w
  37. s.closed = false
  38. }
  39. // NewStatelessWriter will do compression but without maintaining any state
  40. // between Write calls.
  41. // There will be no memory kept between Write calls,
  42. // but compression and speed will be suboptimal.
  43. // Because of this, the size of actual Write calls will affect output size.
  44. func NewStatelessWriter(dst io.Writer) io.WriteCloser {
  45. return &statelessWriter{dst: dst}
  46. }
  47. // bitWriterPool contains bit writers that can be reused.
  48. var bitWriterPool = sync.Pool{
  49. New: func() interface{} {
  50. return newHuffmanBitWriter(nil)
  51. },
  52. }
  53. // StatelessDeflate allows compressing directly to a Writer without retaining state.
  54. // When returning everything will be flushed.
  55. // Up to 8KB of an optional dictionary can be given which is presumed to precede the block.
  56. // Longer dictionaries will be truncated and will still produce valid output.
  57. // Sending nil dictionary is perfectly fine.
  58. func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error {
  59. var dst tokens
  60. bw := bitWriterPool.Get().(*huffmanBitWriter)
  61. bw.reset(out)
  62. defer func() {
  63. // don't keep a reference to our output
  64. bw.reset(nil)
  65. bitWriterPool.Put(bw)
  66. }()
  67. if eof && len(in) == 0 {
  68. // Just write an EOF block.
  69. // Could be faster...
  70. bw.writeStoredHeader(0, true)
  71. bw.flush()
  72. return bw.err
  73. }
  74. // Truncate dict
  75. if len(dict) > maxStatelessDict {
  76. dict = dict[len(dict)-maxStatelessDict:]
  77. }
  78. // For subsequent loops, keep shallow dict reference to avoid alloc+copy.
  79. var inDict []byte
  80. for len(in) > 0 {
  81. todo := in
  82. if len(inDict) > 0 {
  83. if len(todo) > maxStatelessBlock-maxStatelessDict {
  84. todo = todo[:maxStatelessBlock-maxStatelessDict]
  85. }
  86. } else if len(todo) > maxStatelessBlock-len(dict) {
  87. todo = todo[:maxStatelessBlock-len(dict)]
  88. }
  89. inOrg := in
  90. in = in[len(todo):]
  91. uncompressed := todo
  92. if len(dict) > 0 {
  93. // combine dict and source
  94. bufLen := len(todo) + len(dict)
  95. combined := make([]byte, bufLen)
  96. copy(combined, dict)
  97. copy(combined[len(dict):], todo)
  98. todo = combined
  99. }
  100. // Compress
  101. if len(inDict) == 0 {
  102. statelessEnc(&dst, todo, int16(len(dict)))
  103. } else {
  104. statelessEnc(&dst, inDict[:maxStatelessDict+len(todo)], maxStatelessDict)
  105. }
  106. isEof := eof && len(in) == 0
  107. if dst.n == 0 {
  108. bw.writeStoredHeader(len(uncompressed), isEof)
  109. if bw.err != nil {
  110. return bw.err
  111. }
  112. bw.writeBytes(uncompressed)
  113. } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
  114. // If we removed less than 1/16th, huffman compress the block.
  115. bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
  116. } else {
  117. bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
  118. }
  119. if len(in) > 0 {
  120. // Retain a dict if we have more
  121. inDict = inOrg[len(uncompressed)-maxStatelessDict:]
  122. dict = nil
  123. dst.Reset()
  124. }
  125. if bw.err != nil {
  126. return bw.err
  127. }
  128. }
  129. if !eof {
  130. // Align, only a stored block can do that.
  131. bw.writeStoredHeader(0, false)
  132. }
  133. bw.flush()
  134. return bw.err
  135. }
  136. func hashSL(u uint32) uint32 {
  137. return (u * 0x1e35a7bd) >> slTableShift
  138. }
  139. func load3216(b []byte, i int16) uint32 {
  140. return le.Load32(b, i)
  141. }
  142. func load6416(b []byte, i int16) uint64 {
  143. return le.Load64(b, i)
  144. }
  145. func statelessEnc(dst *tokens, src []byte, startAt int16) {
  146. const (
  147. inputMargin = 12 - 1
  148. minNonLiteralBlockSize = 1 + 1 + inputMargin
  149. )
  150. type tableEntry struct {
  151. offset int16
  152. }
  153. var table [slTableSize]tableEntry
  154. // This check isn't in the Snappy implementation, but there, the caller
  155. // instead of the callee handles this case.
  156. if len(src)-int(startAt) < minNonLiteralBlockSize {
  157. // We do not fill the token table.
  158. // This will be picked up by caller.
  159. dst.n = 0
  160. return
  161. }
  162. // Index until startAt
  163. if startAt > 0 {
  164. cv := load3232(src, 0)
  165. for i := int16(0); i < startAt; i++ {
  166. table[hashSL(cv)] = tableEntry{offset: i}
  167. cv = (cv >> 8) | (uint32(src[i+4]) << 24)
  168. }
  169. }
  170. s := startAt + 1
  171. nextEmit := startAt
  172. // sLimit is when to stop looking for offset/length copies. The inputMargin
  173. // lets us use a fast path for emitLiteral in the main loop, while we are
  174. // looking for copies.
  175. sLimit := int16(len(src) - inputMargin)
  176. // nextEmit is where in src the next emitLiteral should start from.
  177. cv := load3216(src, s)
  178. for {
  179. const skipLog = 5
  180. const doEvery = 2
  181. nextS := s
  182. var candidate tableEntry
  183. for {
  184. nextHash := hashSL(cv)
  185. candidate = table[nextHash]
  186. nextS = s + doEvery + (s-nextEmit)>>skipLog
  187. if nextS > sLimit || nextS <= 0 {
  188. goto emitRemainder
  189. }
  190. now := load6416(src, nextS)
  191. table[nextHash] = tableEntry{offset: s}
  192. nextHash = hashSL(uint32(now))
  193. if cv == load3216(src, candidate.offset) {
  194. table[nextHash] = tableEntry{offset: nextS}
  195. break
  196. }
  197. // Do one right away...
  198. cv = uint32(now)
  199. s = nextS
  200. nextS++
  201. candidate = table[nextHash]
  202. now >>= 8
  203. table[nextHash] = tableEntry{offset: s}
  204. if cv == load3216(src, candidate.offset) {
  205. table[nextHash] = tableEntry{offset: nextS}
  206. break
  207. }
  208. cv = uint32(now)
  209. s = nextS
  210. }
  211. // A 4-byte match has been found. We'll later see if more than 4 bytes
  212. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  213. // them as literal bytes.
  214. for {
  215. // Invariant: we have a 4-byte match at s, and no need to emit any
  216. // literal bytes prior to s.
  217. // Extend the 4-byte match as long as possible.
  218. t := candidate.offset
  219. l := int16(matchLen(src[s+4:], src[t+4:]) + 4)
  220. // Extend backwards
  221. for t > 0 && s > nextEmit && src[t-1] == src[s-1] {
  222. s--
  223. t--
  224. l++
  225. }
  226. if nextEmit < s {
  227. if false {
  228. emitLiteral(dst, src[nextEmit:s])
  229. } else {
  230. for _, v := range src[nextEmit:s] {
  231. dst.tokens[dst.n] = token(v)
  232. dst.litHist[v]++
  233. dst.n++
  234. }
  235. }
  236. }
  237. // Save the match found
  238. dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset))
  239. s += l
  240. nextEmit = s
  241. if nextS >= s {
  242. s = nextS + 1
  243. }
  244. if s >= sLimit {
  245. goto emitRemainder
  246. }
  247. // We could immediately start working at s now, but to improve
  248. // compression we first update the hash table at s-2 and at s. If
  249. // another emitCopy is not our next move, also calculate nextHash
  250. // at s+1. At least on GOARCH=amd64, these three hash calculations
  251. // are faster as one load64 call (with some shifts) instead of
  252. // three load32 calls.
  253. x := load6416(src, s-2)
  254. o := s - 2
  255. prevHash := hashSL(uint32(x))
  256. table[prevHash] = tableEntry{offset: o}
  257. x >>= 16
  258. currHash := hashSL(uint32(x))
  259. candidate = table[currHash]
  260. table[currHash] = tableEntry{offset: o + 2}
  261. if uint32(x) != load3216(src, candidate.offset) {
  262. cv = uint32(x >> 8)
  263. s++
  264. break
  265. }
  266. }
  267. }
  268. emitRemainder:
  269. if int(nextEmit) < len(src) {
  270. // If nothing was added, don't encode literals.
  271. if dst.n == 0 {
  272. return
  273. }
  274. emitLiteral(dst, src[nextEmit:])
  275. }
  276. }