encoder_options.go 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. package zstd
  2. import (
  3. "errors"
  4. "fmt"
  5. "math"
  6. "math/bits"
  7. "runtime"
  8. "strings"
  9. )
  10. // EOption is an option for creating a encoder.
  11. type EOption func(*encoderOptions) error
  12. // options retains accumulated state of multiple options.
  13. type encoderOptions struct {
  14. resetOpt bool
  15. concurrent int
  16. level EncoderLevel
  17. single *bool
  18. pad int
  19. blockSize int
  20. windowSize int
  21. crc bool
  22. fullZero bool
  23. noEntropy bool
  24. allLitEntropy bool
  25. customWindow bool
  26. customALEntropy bool
  27. customBlockSize bool
  28. lowMem bool
  29. dict *dict
  30. }
  31. func (o *encoderOptions) setDefault() {
  32. *o = encoderOptions{
  33. concurrent: runtime.GOMAXPROCS(0),
  34. crc: true,
  35. single: nil,
  36. blockSize: maxCompressedBlockSize,
  37. windowSize: 8 << 20,
  38. level: SpeedDefault,
  39. allLitEntropy: false,
  40. lowMem: false,
  41. }
  42. }
  43. // encoder returns an encoder with the selected options.
  44. func (o encoderOptions) encoder() encoder {
  45. switch o.level {
  46. case SpeedFastest:
  47. if o.dict != nil {
  48. return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
  49. }
  50. return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
  51. case SpeedDefault:
  52. if o.dict != nil {
  53. return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}}
  54. }
  55. return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
  56. case SpeedBetterCompression:
  57. if o.dict != nil {
  58. return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}}
  59. }
  60. return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
  61. case SpeedBestCompression:
  62. return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), bufferReset: math.MaxInt32 - int32(o.windowSize*2), lowMem: o.lowMem}}
  63. }
  64. panic("unknown compression level")
  65. }
  66. // WithEncoderCRC will add CRC value to output.
  67. // Output will be 4 bytes larger.
  68. // Can be changed with ResetWithOptions.
  69. func WithEncoderCRC(b bool) EOption {
  70. return func(o *encoderOptions) error { o.crc = b; return nil }
  71. }
  72. // WithEncoderConcurrency will set the concurrency,
  73. // meaning the maximum number of encoders to run concurrently.
  74. // The value supplied must be at least 0.
  75. // When a value of 0 is provided GOMAXPROCS will be used.
  76. // For streams, setting a value of 1 will disable async compression.
  77. // By default this will be set to GOMAXPROCS.
  78. // Cannot be changed with ResetWithOptions.
  79. func WithEncoderConcurrency(n int) EOption {
  80. return func(o *encoderOptions) error {
  81. if n < 0 {
  82. return errors.New("concurrency must at least 0")
  83. }
  84. if n == 0 {
  85. n = runtime.GOMAXPROCS(0)
  86. }
  87. if o.resetOpt && n != o.concurrent {
  88. return errors.New("WithEncoderConcurrency cannot be changed on Reset")
  89. }
  90. o.concurrent = n
  91. return nil
  92. }
  93. }
  94. // WithWindowSize will set the maximum allowed back-reference distance.
  95. // The value must be a power of two between MinWindowSize and MaxWindowSize.
  96. // A larger value will enable better compression but allocate more memory and,
  97. // for above-default values, take considerably longer.
  98. // The default value is determined by the compression level and max 8MB.
  99. // Cannot be changed with ResetWithOptions.
  100. func WithWindowSize(n int) EOption {
  101. return func(o *encoderOptions) error {
  102. switch {
  103. case n < MinWindowSize:
  104. return fmt.Errorf("window size must be at least %d", MinWindowSize)
  105. case n > MaxWindowSize:
  106. return fmt.Errorf("window size must be at most %d", MaxWindowSize)
  107. case (n & (n - 1)) != 0:
  108. return errors.New("window size must be a power of 2")
  109. }
  110. if o.resetOpt && n != o.windowSize {
  111. return errors.New("WithWindowSize cannot be changed on Reset")
  112. }
  113. o.windowSize = n
  114. o.customWindow = true
  115. if o.blockSize > o.windowSize {
  116. o.blockSize = o.windowSize
  117. o.customBlockSize = true
  118. }
  119. return nil
  120. }
  121. }
  122. // WithEncoderPadding will add padding to all output so the size will be a multiple of n.
  123. // This can be used to obfuscate the exact output size or make blocks of a certain size.
  124. // The contents will be a skippable frame, so it will be invisible by the decoder.
  125. // n must be > 0 and <= 1GB, 1<<30 bytes.
  126. // The padded area will be filled with data from crypto/rand.Reader.
  127. // If `EncodeAll` is used with data already in the destination, the total size will be multiple of this.
  128. // Can be changed with ResetWithOptions.
  129. func WithEncoderPadding(n int) EOption {
  130. return func(o *encoderOptions) error {
  131. if n <= 0 {
  132. return fmt.Errorf("padding must be at least 1")
  133. }
  134. // No need to waste our time.
  135. if n == 1 {
  136. n = 0
  137. }
  138. if n > 1<<30 {
  139. return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ")
  140. }
  141. o.pad = n
  142. return nil
  143. }
  144. }
  145. // EncoderLevel predefines encoder compression levels.
  146. // Only use the constants made available, since the actual mapping
  147. // of these values are very likely to change and your compression could change
  148. // unpredictably when upgrading the library.
  149. type EncoderLevel int
  150. const (
  151. speedNotSet EncoderLevel = iota
  152. // SpeedFastest will choose the fastest reasonable compression.
  153. // This is roughly equivalent to the fastest Zstandard mode.
  154. SpeedFastest
  155. // SpeedDefault is the default "pretty fast" compression option.
  156. // This is roughly equivalent to the default Zstandard mode (level 3).
  157. SpeedDefault
  158. // SpeedBetterCompression will yield better compression than the default.
  159. // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage.
  160. // By using this, notice that CPU usage may go up in the future.
  161. SpeedBetterCompression
  162. // SpeedBestCompression will choose the best available compression option.
  163. // This will offer the best compression no matter the CPU cost.
  164. SpeedBestCompression
  165. // speedLast should be kept as the last actual compression option.
  166. // The is not for external usage, but is used to keep track of the valid options.
  167. speedLast
  168. )
  169. // EncoderLevelFromString will convert a string representation of an encoding level back
  170. // to a compression level. The compare is not case sensitive.
  171. // If the string wasn't recognized, (false, SpeedDefault) will be returned.
  172. func EncoderLevelFromString(s string) (bool, EncoderLevel) {
  173. for l := speedNotSet + 1; l < speedLast; l++ {
  174. if strings.EqualFold(s, l.String()) {
  175. return true, l
  176. }
  177. }
  178. return false, SpeedDefault
  179. }
  180. // EncoderLevelFromZstd will return an encoder level that closest matches the compression
  181. // ratio of a specific zstd compression level.
  182. // Many input values will provide the same compression level.
  183. func EncoderLevelFromZstd(level int) EncoderLevel {
  184. switch {
  185. case level < 3:
  186. return SpeedFastest
  187. case level >= 3 && level < 6:
  188. return SpeedDefault
  189. case level >= 6 && level < 10:
  190. return SpeedBetterCompression
  191. default:
  192. return SpeedBestCompression
  193. }
  194. }
  195. // String provides a string representation of the compression level.
  196. func (e EncoderLevel) String() string {
  197. switch e {
  198. case SpeedFastest:
  199. return "fastest"
  200. case SpeedDefault:
  201. return "default"
  202. case SpeedBetterCompression:
  203. return "better"
  204. case SpeedBestCompression:
  205. return "best"
  206. default:
  207. return "invalid"
  208. }
  209. }
  210. // WithEncoderLevel specifies a predefined compression level.
  211. // Cannot be changed with ResetWithOptions.
  212. func WithEncoderLevel(l EncoderLevel) EOption {
  213. return func(o *encoderOptions) error {
  214. switch {
  215. case l <= speedNotSet || l >= speedLast:
  216. return fmt.Errorf("unknown encoder level")
  217. }
  218. if o.resetOpt && l != o.level {
  219. return errors.New("WithEncoderLevel cannot be changed on Reset")
  220. }
  221. o.level = l
  222. if !o.customWindow {
  223. switch o.level {
  224. case SpeedFastest:
  225. o.windowSize = 4 << 20
  226. if !o.customBlockSize {
  227. o.blockSize = 1 << 16
  228. }
  229. case SpeedDefault:
  230. o.windowSize = 8 << 20
  231. case SpeedBetterCompression:
  232. o.windowSize = 8 << 20
  233. case SpeedBestCompression:
  234. o.windowSize = 8 << 20
  235. }
  236. }
  237. if !o.customALEntropy {
  238. o.allLitEntropy = l > SpeedDefault
  239. }
  240. return nil
  241. }
  242. }
  243. // WithZeroFrames will encode 0 length input as full frames.
  244. // This can be needed for compatibility with zstandard usage,
  245. // but is not needed for this package.
  246. // Can be changed with ResetWithOptions.
  247. func WithZeroFrames(b bool) EOption {
  248. return func(o *encoderOptions) error {
  249. o.fullZero = b
  250. return nil
  251. }
  252. }
  253. // WithAllLitEntropyCompression will apply entropy compression if no matches are found.
  254. // Disabling this will skip incompressible data faster, but in cases with no matches but
  255. // skewed character distribution compression is lost.
  256. // Default value depends on the compression level selected.
  257. // Can be changed with ResetWithOptions.
  258. func WithAllLitEntropyCompression(b bool) EOption {
  259. return func(o *encoderOptions) error {
  260. o.customALEntropy = true
  261. o.allLitEntropy = b
  262. return nil
  263. }
  264. }
  265. // WithNoEntropyCompression will always skip entropy compression of literals.
  266. // This can be useful if content has matches, but unlikely to benefit from entropy
  267. // compression. Usually the slight speed improvement is not worth enabling this.
  268. // Can be changed with ResetWithOptions.
  269. func WithNoEntropyCompression(b bool) EOption {
  270. return func(o *encoderOptions) error {
  271. o.noEntropy = b
  272. return nil
  273. }
  274. }
  275. // WithSingleSegment will set the "single segment" flag when EncodeAll is used.
  276. // If this flag is set, data must be regenerated within a single continuous memory segment.
  277. // In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present.
  278. // As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content.
  279. // In order to preserve the decoder from unreasonable memory requirements,
  280. // a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
  281. // For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
  282. // This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
  283. // If this is not specified, block encodes will automatically choose this based on the input size and the window size.
  284. // This setting has no effect on streamed encodes.
  285. // Can be changed with ResetWithOptions.
  286. func WithSingleSegment(b bool) EOption {
  287. return func(o *encoderOptions) error {
  288. o.single = &b
  289. return nil
  290. }
  291. }
  292. // WithLowerEncoderMem will trade in some memory cases trade less memory usage for
  293. // slower encoding speed.
  294. // This will not change the window size which is the primary function for reducing
  295. // memory usage. See WithWindowSize.
  296. // Cannot be changed with ResetWithOptions.
  297. func WithLowerEncoderMem(b bool) EOption {
  298. return func(o *encoderOptions) error {
  299. if o.resetOpt && b != o.lowMem {
  300. return errors.New("WithLowerEncoderMem cannot be changed on Reset")
  301. }
  302. o.lowMem = b
  303. return nil
  304. }
  305. }
  306. // WithEncoderDict allows to register a dictionary that will be used for the encode.
  307. //
  308. // The slice dict must be in the [dictionary format] produced by
  309. // "zstd --train" from the Zstandard reference implementation.
  310. //
  311. // The encoder *may* choose to use no dictionary instead for certain payloads.
  312. // Can be changed with ResetWithOptions.
  313. //
  314. // [dictionary format]: https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary-format
  315. func WithEncoderDict(dict []byte) EOption {
  316. return func(o *encoderOptions) error {
  317. d, err := loadDict(dict)
  318. if err != nil {
  319. return err
  320. }
  321. o.dict = d
  322. return nil
  323. }
  324. }
  325. // WithEncoderDictRaw registers a dictionary that may be used by the encoder.
  326. //
  327. // The slice content may contain arbitrary data. It will be used as an initial
  328. // history.
  329. // Can be changed with ResetWithOptions.
  330. func WithEncoderDictRaw(id uint32, content []byte) EOption {
  331. return func(o *encoderOptions) error {
  332. if bits.UintSize > 32 && uint(len(content)) > dictMaxLength {
  333. return fmt.Errorf("dictionary of size %d > 2GiB too large", len(content))
  334. }
  335. o.dict = &dict{id: id, content: content, offsets: [3]int{1, 4, 8}}
  336. return nil
  337. }
  338. }
  339. // WithEncoderDictDelete clears the dictionary, so no dictionary will be used.
  340. // Should be used with ResetWithOptions.
  341. func WithEncoderDictDelete() EOption {
  342. return func(o *encoderOptions) error {
  343. o.dict = nil
  344. return nil
  345. }
  346. }