encode_amd64.go 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. //go:build !appengine && !noasm && gc
  2. // +build !appengine,!noasm,gc
  3. package s2
  4. import (
  5. "sync"
  6. "github.com/klauspost/compress/internal/race"
  7. )
  8. const hasAmd64Asm = true
  9. var encPools [4]sync.Pool
  10. // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
  11. // assumes that the varint-encoded length of the decompressed bytes has already
  12. // been written.
  13. //
  14. // It also assumes that:
  15. //
  16. // len(dst) >= MaxEncodedLen(len(src)) &&
  17. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  18. func encodeBlock(dst, src []byte) (d int) {
  19. race.ReadSlice(src)
  20. race.WriteSlice(dst)
  21. const (
  22. // Use 12 bit table when less than...
  23. limit12B = 16 << 10
  24. // Use 10 bit table when less than...
  25. limit10B = 4 << 10
  26. // Use 8 bit table when less than...
  27. limit8B = 512
  28. )
  29. if len(src) >= 4<<20 {
  30. const sz, pool = 65536, 0
  31. tmp, ok := encPools[pool].Get().(*[sz]byte)
  32. if !ok {
  33. tmp = &[sz]byte{}
  34. }
  35. race.WriteSlice(tmp[:])
  36. defer encPools[pool].Put(tmp)
  37. return encodeBlockAsm(dst, src, tmp)
  38. }
  39. if len(src) >= limit12B {
  40. const sz, pool = 65536, 0
  41. tmp, ok := encPools[pool].Get().(*[sz]byte)
  42. if !ok {
  43. tmp = &[sz]byte{}
  44. }
  45. race.WriteSlice(tmp[:])
  46. defer encPools[pool].Put(tmp)
  47. return encodeBlockAsm4MB(dst, src, tmp)
  48. }
  49. if len(src) >= limit10B {
  50. const sz, pool = 16384, 1
  51. tmp, ok := encPools[pool].Get().(*[sz]byte)
  52. if !ok {
  53. tmp = &[sz]byte{}
  54. }
  55. race.WriteSlice(tmp[:])
  56. defer encPools[pool].Put(tmp)
  57. return encodeBlockAsm12B(dst, src, tmp)
  58. }
  59. if len(src) >= limit8B {
  60. const sz, pool = 4096, 2
  61. tmp, ok := encPools[pool].Get().(*[sz]byte)
  62. if !ok {
  63. tmp = &[sz]byte{}
  64. }
  65. race.WriteSlice(tmp[:])
  66. defer encPools[pool].Put(tmp)
  67. return encodeBlockAsm10B(dst, src, tmp)
  68. }
  69. if len(src) < minNonLiteralBlockSize {
  70. return 0
  71. }
  72. const sz, pool = 1024, 3
  73. tmp, ok := encPools[pool].Get().(*[sz]byte)
  74. if !ok {
  75. tmp = &[sz]byte{}
  76. }
  77. race.WriteSlice(tmp[:])
  78. defer encPools[pool].Put(tmp)
  79. return encodeBlockAsm8B(dst, src, tmp)
  80. }
  81. var encBetterPools [5]sync.Pool
  82. // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  83. // assumes that the varint-encoded length of the decompressed bytes has already
  84. // been written.
  85. //
  86. // It also assumes that:
  87. //
  88. // len(dst) >= MaxEncodedLen(len(src)) &&
  89. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  90. func encodeBlockBetter(dst, src []byte) (d int) {
  91. race.ReadSlice(src)
  92. race.WriteSlice(dst)
  93. const (
  94. // Use 12 bit table when less than...
  95. limit12B = 16 << 10
  96. // Use 10 bit table when less than...
  97. limit10B = 4 << 10
  98. // Use 8 bit table when less than...
  99. limit8B = 512
  100. )
  101. if len(src) > 4<<20 {
  102. const sz, pool = 589824, 0
  103. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  104. if !ok {
  105. tmp = &[sz]byte{}
  106. }
  107. race.WriteSlice(tmp[:])
  108. defer encBetterPools[pool].Put(tmp)
  109. return encodeBetterBlockAsm(dst, src, tmp)
  110. }
  111. if len(src) >= limit12B {
  112. const sz, pool = 589824, 0
  113. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  114. if !ok {
  115. tmp = &[sz]byte{}
  116. }
  117. race.WriteSlice(tmp[:])
  118. defer encBetterPools[pool].Put(tmp)
  119. return encodeBetterBlockAsm4MB(dst, src, tmp)
  120. }
  121. if len(src) >= limit10B {
  122. const sz, pool = 81920, 0
  123. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  124. if !ok {
  125. tmp = &[sz]byte{}
  126. }
  127. race.WriteSlice(tmp[:])
  128. defer encBetterPools[pool].Put(tmp)
  129. return encodeBetterBlockAsm12B(dst, src, tmp)
  130. }
  131. if len(src) >= limit8B {
  132. const sz, pool = 20480, 1
  133. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  134. if !ok {
  135. tmp = &[sz]byte{}
  136. }
  137. race.WriteSlice(tmp[:])
  138. defer encBetterPools[pool].Put(tmp)
  139. return encodeBetterBlockAsm10B(dst, src, tmp)
  140. }
  141. if len(src) < minNonLiteralBlockSize {
  142. return 0
  143. }
  144. const sz, pool = 5120, 2
  145. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  146. if !ok {
  147. tmp = &[sz]byte{}
  148. }
  149. race.WriteSlice(tmp[:])
  150. defer encBetterPools[pool].Put(tmp)
  151. return encodeBetterBlockAsm8B(dst, src, tmp)
  152. }
  153. // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
  154. // assumes that the varint-encoded length of the decompressed bytes has already
  155. // been written.
  156. //
  157. // It also assumes that:
  158. //
  159. // len(dst) >= MaxEncodedLen(len(src)) &&
  160. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  161. func encodeBlockSnappy(dst, src []byte) (d int) {
  162. race.ReadSlice(src)
  163. race.WriteSlice(dst)
  164. const (
  165. // Use 12 bit table when less than...
  166. limit12B = 16 << 10
  167. // Use 10 bit table when less than...
  168. limit10B = 4 << 10
  169. // Use 8 bit table when less than...
  170. limit8B = 512
  171. )
  172. if len(src) > 65536 {
  173. const sz, pool = 65536, 0
  174. tmp, ok := encPools[pool].Get().(*[sz]byte)
  175. if !ok {
  176. tmp = &[sz]byte{}
  177. }
  178. race.WriteSlice(tmp[:])
  179. defer encPools[pool].Put(tmp)
  180. return encodeSnappyBlockAsm(dst, src, tmp)
  181. }
  182. if len(src) >= limit12B {
  183. const sz, pool = 65536, 0
  184. tmp, ok := encPools[pool].Get().(*[sz]byte)
  185. if !ok {
  186. tmp = &[sz]byte{}
  187. }
  188. race.WriteSlice(tmp[:])
  189. defer encPools[pool].Put(tmp)
  190. return encodeSnappyBlockAsm64K(dst, src, tmp)
  191. }
  192. if len(src) >= limit10B {
  193. const sz, pool = 16384, 1
  194. tmp, ok := encPools[pool].Get().(*[sz]byte)
  195. if !ok {
  196. tmp = &[sz]byte{}
  197. }
  198. race.WriteSlice(tmp[:])
  199. defer encPools[pool].Put(tmp)
  200. return encodeSnappyBlockAsm12B(dst, src, tmp)
  201. }
  202. if len(src) >= limit8B {
  203. const sz, pool = 4096, 2
  204. tmp, ok := encPools[pool].Get().(*[sz]byte)
  205. if !ok {
  206. tmp = &[sz]byte{}
  207. }
  208. race.WriteSlice(tmp[:])
  209. defer encPools[pool].Put(tmp)
  210. return encodeSnappyBlockAsm10B(dst, src, tmp)
  211. }
  212. if len(src) < minNonLiteralBlockSize {
  213. return 0
  214. }
  215. const sz, pool = 1024, 3
  216. tmp, ok := encPools[pool].Get().(*[sz]byte)
  217. if !ok {
  218. tmp = &[sz]byte{}
  219. }
  220. race.WriteSlice(tmp[:])
  221. defer encPools[pool].Put(tmp)
  222. return encodeSnappyBlockAsm8B(dst, src, tmp)
  223. }
  224. // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
  225. // assumes that the varint-encoded length of the decompressed bytes has already
  226. // been written.
  227. //
  228. // It also assumes that:
  229. //
  230. // len(dst) >= MaxEncodedLen(len(src)) &&
  231. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  232. func encodeBlockBetterSnappy(dst, src []byte) (d int) {
  233. race.ReadSlice(src)
  234. race.WriteSlice(dst)
  235. const (
  236. // Use 12 bit table when less than...
  237. limit12B = 16 << 10
  238. // Use 10 bit table when less than...
  239. limit10B = 4 << 10
  240. // Use 8 bit table when less than...
  241. limit8B = 512
  242. )
  243. if len(src) > 65536 {
  244. const sz, pool = 589824, 0
  245. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  246. if !ok {
  247. tmp = &[sz]byte{}
  248. }
  249. race.WriteSlice(tmp[:])
  250. defer encBetterPools[pool].Put(tmp)
  251. return encodeSnappyBetterBlockAsm(dst, src, tmp)
  252. }
  253. if len(src) >= limit12B {
  254. const sz, pool = 294912, 4
  255. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  256. if !ok {
  257. tmp = &[sz]byte{}
  258. }
  259. race.WriteSlice(tmp[:])
  260. defer encBetterPools[pool].Put(tmp)
  261. return encodeSnappyBetterBlockAsm64K(dst, src, tmp)
  262. }
  263. if len(src) >= limit10B {
  264. const sz, pool = 81920, 0
  265. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  266. if !ok {
  267. tmp = &[sz]byte{}
  268. }
  269. race.WriteSlice(tmp[:])
  270. defer encBetterPools[pool].Put(tmp)
  271. return encodeSnappyBetterBlockAsm12B(dst, src, tmp)
  272. }
  273. if len(src) >= limit8B {
  274. const sz, pool = 20480, 1
  275. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  276. if !ok {
  277. tmp = &[sz]byte{}
  278. }
  279. race.WriteSlice(tmp[:])
  280. defer encBetterPools[pool].Put(tmp)
  281. return encodeSnappyBetterBlockAsm10B(dst, src, tmp)
  282. }
  283. if len(src) < minNonLiteralBlockSize {
  284. return 0
  285. }
  286. const sz, pool = 5120, 2
  287. tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
  288. if !ok {
  289. tmp = &[sz]byte{}
  290. }
  291. race.WriteSlice(tmp[:])
  292. defer encBetterPools[pool].Put(tmp)
  293. return encodeSnappyBetterBlockAsm8B(dst, src, tmp)
  294. }