encoder.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. package brotli
  2. import "github.com/andybalholm/brotli/matchfinder"
  3. // An Encoder implements the matchfinder.Encoder interface, writing in Brotli format.
  4. type Encoder struct {
  5. wroteHeader bool
  6. bw bitWriter
  7. distCache []distanceCode
  8. }
  9. func (e *Encoder) Reset() {
  10. e.wroteHeader = false
  11. e.bw = bitWriter{}
  12. }
  13. func (e *Encoder) Encode(dst []byte, src []byte, matches []matchfinder.Match, lastBlock bool) []byte {
  14. e.bw.dst = dst
  15. if !e.wroteHeader {
  16. e.bw.writeBits(4, 15)
  17. e.wroteHeader = true
  18. }
  19. if len(src) == 0 {
  20. if lastBlock {
  21. e.bw.writeBits(2, 3) // islast + isempty
  22. e.bw.jumpToByteBoundary()
  23. return e.bw.dst
  24. }
  25. return dst
  26. }
  27. var literalHisto [256]uint32
  28. var commandHisto [704]uint32
  29. var distanceHisto [64]uint32
  30. literalCount := 0
  31. commandCount := 0
  32. distanceCount := 0
  33. if len(e.distCache) < len(matches) {
  34. e.distCache = make([]distanceCode, len(matches))
  35. }
  36. // first pass: build the histograms
  37. pos := 0
  38. // d is the ring buffer of the last 4 distances.
  39. d := [4]int{-10, -10, -10, -10}
  40. for i, m := range matches {
  41. if m.Unmatched > 0 {
  42. for _, c := range src[pos : pos+m.Unmatched] {
  43. literalHisto[c]++
  44. }
  45. literalCount += m.Unmatched
  46. }
  47. insertCode := getInsertLengthCode(uint(m.Unmatched))
  48. copyCode := getCopyLengthCode(uint(m.Length))
  49. if m.Length == 0 {
  50. // If the stream ends with unmatched bytes, we need a dummy copy length.
  51. copyCode = 2
  52. }
  53. command := combineLengthCodes(insertCode, copyCode, false)
  54. commandHisto[command]++
  55. commandCount++
  56. if command >= 128 && m.Length != 0 {
  57. var distCode distanceCode
  58. switch m.Distance {
  59. case d[3]:
  60. distCode.code = 0
  61. case d[2]:
  62. distCode.code = 1
  63. case d[1]:
  64. distCode.code = 2
  65. case d[0]:
  66. distCode.code = 3
  67. case d[3] - 1:
  68. distCode.code = 4
  69. case d[3] + 1:
  70. distCode.code = 5
  71. case d[3] - 2:
  72. distCode.code = 6
  73. case d[3] + 2:
  74. distCode.code = 7
  75. case d[3] - 3:
  76. distCode.code = 8
  77. case d[3] + 3:
  78. distCode.code = 9
  79. // In my testing, codes 10–15 actually reduced the compression ratio.
  80. default:
  81. distCode = getDistanceCode(m.Distance)
  82. }
  83. e.distCache[i] = distCode
  84. distanceHisto[distCode.code]++
  85. distanceCount++
  86. if distCode.code != 0 {
  87. d[0], d[1], d[2], d[3] = d[1], d[2], d[3], m.Distance
  88. }
  89. }
  90. pos += m.Unmatched + m.Length
  91. }
  92. storeMetaBlockHeaderBW(uint(len(src)), false, &e.bw)
  93. e.bw.writeBits(13, 0)
  94. var literalDepths [256]byte
  95. var literalBits [256]uint16
  96. buildAndStoreHuffmanTreeFastBW(literalHisto[:], uint(literalCount), 8, literalDepths[:], literalBits[:], &e.bw)
  97. var commandDepths [704]byte
  98. var commandBits [704]uint16
  99. buildAndStoreHuffmanTreeFastBW(commandHisto[:], uint(commandCount), 10, commandDepths[:], commandBits[:], &e.bw)
  100. var distanceDepths [64]byte
  101. var distanceBits [64]uint16
  102. buildAndStoreHuffmanTreeFastBW(distanceHisto[:], uint(distanceCount), 6, distanceDepths[:], distanceBits[:], &e.bw)
  103. pos = 0
  104. for i, m := range matches {
  105. insertCode := getInsertLengthCode(uint(m.Unmatched))
  106. copyCode := getCopyLengthCode(uint(m.Length))
  107. if m.Length == 0 {
  108. // If the stream ends with unmatched bytes, we need a dummy copy length.
  109. copyCode = 2
  110. }
  111. command := combineLengthCodes(insertCode, copyCode, false)
  112. e.bw.writeBits(uint(commandDepths[command]), uint64(commandBits[command]))
  113. if kInsExtra[insertCode] > 0 {
  114. e.bw.writeBits(uint(kInsExtra[insertCode]), uint64(m.Unmatched)-uint64(kInsBase[insertCode]))
  115. }
  116. if kCopyExtra[copyCode] > 0 {
  117. e.bw.writeBits(uint(kCopyExtra[copyCode]), uint64(m.Length)-uint64(kCopyBase[copyCode]))
  118. }
  119. if m.Unmatched > 0 {
  120. for _, c := range src[pos : pos+m.Unmatched] {
  121. e.bw.writeBits(uint(literalDepths[c]), uint64(literalBits[c]))
  122. }
  123. }
  124. if command >= 128 && m.Length != 0 {
  125. distCode := e.distCache[i]
  126. e.bw.writeBits(uint(distanceDepths[distCode.code]), uint64(distanceBits[distCode.code]))
  127. if distCode.nExtra > 0 {
  128. e.bw.writeBits(distCode.nExtra, distCode.extraBits)
  129. }
  130. }
  131. pos += m.Unmatched + m.Length
  132. }
  133. if lastBlock {
  134. e.bw.writeBits(2, 3) // islast + isempty
  135. e.bw.jumpToByteBoundary()
  136. }
  137. return e.bw.dst
  138. }
  139. type distanceCode struct {
  140. code int
  141. nExtra uint
  142. extraBits uint64
  143. }
  144. func getDistanceCode(distance int) distanceCode {
  145. d := distance + 3
  146. nbits := log2FloorNonZero(uint(d)) - 1
  147. prefix := (d >> nbits) & 1
  148. offset := (2 + prefix) << nbits
  149. distcode := int(2*(nbits-1)) + prefix + 16
  150. extra := d - offset
  151. return distanceCode{distcode, uint(nbits), uint64(extra)}
  152. }