enc_fast.go 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873
  1. // Copyright 2019+ Klaus Post. All rights reserved.
  2. // License information can be found in the LICENSE file.
  3. // Based on work by Yann Collet, released under BSD License.
  4. package zstd
  5. import (
  6. "fmt"
  7. )
  8. const (
  9. tableBits = 15 // Bits used in the table
  10. tableSize = 1 << tableBits // Size of the table
  11. tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
  12. tableShardSize = tableSize / tableShardCnt // Size of an individual shard
  13. tableFastHashLen = 6
  14. tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
  15. maxMatchLength = 131074
  16. )
  17. type tableEntry struct {
  18. val uint32
  19. offset int32
  20. }
  21. type fastEncoder struct {
  22. fastBase
  23. table [tableSize]tableEntry
  24. }
  25. type fastEncoderDict struct {
  26. fastEncoder
  27. dictTable []tableEntry
  28. tableShardDirty [tableShardCnt]bool
  29. allDirty bool
  30. }
  31. // Encode mimmics functionality in zstd_fast.c
  32. func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
  33. const (
  34. inputMargin = 8
  35. minNonLiteralBlockSize = 1 + 1 + inputMargin
  36. )
  37. // Protect against e.cur wraparound.
  38. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  39. if len(e.hist) == 0 {
  40. for i := range e.table[:] {
  41. e.table[i] = tableEntry{}
  42. }
  43. e.cur = e.maxMatchOff
  44. break
  45. }
  46. // Shift down everything in the table that isn't already too far away.
  47. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  48. for i := range e.table[:] {
  49. v := e.table[i].offset
  50. if v < minOff {
  51. v = 0
  52. } else {
  53. v = v - e.cur + e.maxMatchOff
  54. }
  55. e.table[i].offset = v
  56. }
  57. e.cur = e.maxMatchOff
  58. break
  59. }
  60. s := e.addBlock(src)
  61. blk.size = len(src)
  62. if len(src) < minNonLiteralBlockSize {
  63. blk.extraLits = len(src)
  64. blk.literals = blk.literals[:len(src)]
  65. copy(blk.literals, src)
  66. return
  67. }
  68. // Override src
  69. src = e.hist
  70. sLimit := int32(len(src)) - inputMargin
  71. // stepSize is the number of bytes to skip on every main loop iteration.
  72. // It should be >= 2.
  73. const stepSize = 2
  74. // TEMPLATE
  75. const hashLog = tableBits
  76. // seems global, but would be nice to tweak.
  77. const kSearchStrength = 6
  78. // nextEmit is where in src the next emitLiteral should start from.
  79. nextEmit := s
  80. cv := load6432(src, s)
  81. // Relative offsets
  82. offset1 := int32(blk.recentOffsets[0])
  83. offset2 := int32(blk.recentOffsets[1])
  84. addLiterals := func(s *seq, until int32) {
  85. if until == nextEmit {
  86. return
  87. }
  88. blk.literals = append(blk.literals, src[nextEmit:until]...)
  89. s.litLen = uint32(until - nextEmit)
  90. }
  91. if debugEncoder {
  92. println("recent offsets:", blk.recentOffsets)
  93. }
  94. encodeLoop:
  95. for {
  96. // t will contain the match offset when we find one.
  97. // When existing the search loop, we have already checked 4 bytes.
  98. var t int32
  99. // We will not use repeat offsets across blocks.
  100. // By not using them for the first 3 matches
  101. canRepeat := len(blk.sequences) > 2
  102. for {
  103. if debugAsserts && canRepeat && offset1 == 0 {
  104. panic("offset0 was 0")
  105. }
  106. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  107. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  108. candidate := e.table[nextHash]
  109. candidate2 := e.table[nextHash2]
  110. repIndex := s - offset1 + 2
  111. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  112. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  113. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  114. // Consider history as well.
  115. var seq seq
  116. length := 4 + e.matchlen(s+6, repIndex+4, src)
  117. seq.matchLen = uint32(length - zstdMinMatch)
  118. // We might be able to match backwards.
  119. // Extend as long as we can.
  120. start := s + 2
  121. // We end the search early, so we don't risk 0 literals
  122. // and have to do special offset treatment.
  123. startLimit := nextEmit + 1
  124. sMin := max(s-e.maxMatchOff, 0)
  125. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  126. repIndex--
  127. start--
  128. seq.matchLen++
  129. }
  130. addLiterals(&seq, start)
  131. // rep 0
  132. seq.offset = 1
  133. if debugSequences {
  134. println("repeat sequence", seq, "next s:", s)
  135. }
  136. blk.sequences = append(blk.sequences, seq)
  137. s += length + 2
  138. nextEmit = s
  139. if s >= sLimit {
  140. if debugEncoder {
  141. println("repeat ended", s, length)
  142. }
  143. break encodeLoop
  144. }
  145. cv = load6432(src, s)
  146. continue
  147. }
  148. coffset0 := s - (candidate.offset - e.cur)
  149. coffset1 := s - (candidate2.offset - e.cur) + 1
  150. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  151. // found a regular match
  152. t = candidate.offset - e.cur
  153. if debugAsserts && s <= t {
  154. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  155. }
  156. if debugAsserts && s-t > e.maxMatchOff {
  157. panic("s - t >e.maxMatchOff")
  158. }
  159. break
  160. }
  161. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  162. // found a regular match
  163. t = candidate2.offset - e.cur
  164. s++
  165. if debugAsserts && s <= t {
  166. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  167. }
  168. if debugAsserts && s-t > e.maxMatchOff {
  169. panic("s - t >e.maxMatchOff")
  170. }
  171. if debugAsserts && t < 0 {
  172. panic("t<0")
  173. }
  174. break
  175. }
  176. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  177. if s >= sLimit {
  178. break encodeLoop
  179. }
  180. cv = load6432(src, s)
  181. }
  182. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  183. offset2 = offset1
  184. offset1 = s - t
  185. if debugAsserts && s <= t {
  186. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  187. }
  188. if debugAsserts && canRepeat && int(offset1) > len(src) {
  189. panic("invalid offset")
  190. }
  191. // Extend the 4-byte match as long as possible.
  192. l := e.matchlen(s+4, t+4, src) + 4
  193. // Extend backwards
  194. tMin := max(s-e.maxMatchOff, 0)
  195. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  196. s--
  197. t--
  198. l++
  199. }
  200. // Write our sequence.
  201. var seq seq
  202. seq.litLen = uint32(s - nextEmit)
  203. seq.matchLen = uint32(l - zstdMinMatch)
  204. if seq.litLen > 0 {
  205. blk.literals = append(blk.literals, src[nextEmit:s]...)
  206. }
  207. // Don't use repeat offsets
  208. seq.offset = uint32(s-t) + 3
  209. s += l
  210. if debugSequences {
  211. println("sequence", seq, "next s:", s)
  212. }
  213. blk.sequences = append(blk.sequences, seq)
  214. nextEmit = s
  215. if s >= sLimit {
  216. break encodeLoop
  217. }
  218. cv = load6432(src, s)
  219. // Check offset 2
  220. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  221. // We have at least 4 byte match.
  222. // No need to check backwards. We come straight from a match
  223. l := 4 + e.matchlen(s+4, o2+4, src)
  224. // Store this, since we have it.
  225. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  226. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  227. seq.matchLen = uint32(l) - zstdMinMatch
  228. seq.litLen = 0
  229. // Since litlen is always 0, this is offset 1.
  230. seq.offset = 1
  231. s += l
  232. nextEmit = s
  233. if debugSequences {
  234. println("sequence", seq, "next s:", s)
  235. }
  236. blk.sequences = append(blk.sequences, seq)
  237. // Swap offset 1 and 2.
  238. offset1, offset2 = offset2, offset1
  239. if s >= sLimit {
  240. break encodeLoop
  241. }
  242. // Prepare next loop.
  243. cv = load6432(src, s)
  244. }
  245. }
  246. if int(nextEmit) < len(src) {
  247. blk.literals = append(blk.literals, src[nextEmit:]...)
  248. blk.extraLits = len(src) - int(nextEmit)
  249. }
  250. blk.recentOffsets[0] = uint32(offset1)
  251. blk.recentOffsets[1] = uint32(offset2)
  252. if debugEncoder {
  253. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  254. }
  255. }
  256. // EncodeNoHist will encode a block with no history and no following blocks.
  257. // Most notable difference is that src will not be copied for history and
  258. // we do not need to check for max match length.
  259. func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
  260. const (
  261. inputMargin = 8
  262. minNonLiteralBlockSize = 1 + 1 + inputMargin
  263. )
  264. if debugEncoder {
  265. if len(src) > maxCompressedBlockSize {
  266. panic("src too big")
  267. }
  268. }
  269. // Protect against e.cur wraparound.
  270. if e.cur >= e.bufferReset {
  271. for i := range e.table[:] {
  272. e.table[i] = tableEntry{}
  273. }
  274. e.cur = e.maxMatchOff
  275. }
  276. s := int32(0)
  277. blk.size = len(src)
  278. if len(src) < minNonLiteralBlockSize {
  279. blk.extraLits = len(src)
  280. blk.literals = blk.literals[:len(src)]
  281. copy(blk.literals, src)
  282. return
  283. }
  284. sLimit := int32(len(src)) - inputMargin
  285. // stepSize is the number of bytes to skip on every main loop iteration.
  286. // It should be >= 2.
  287. const stepSize = 2
  288. // TEMPLATE
  289. const hashLog = tableBits
  290. // seems global, but would be nice to tweak.
  291. const kSearchStrength = 6
  292. // nextEmit is where in src the next emitLiteral should start from.
  293. nextEmit := s
  294. cv := load6432(src, s)
  295. // Relative offsets
  296. offset1 := int32(blk.recentOffsets[0])
  297. offset2 := int32(blk.recentOffsets[1])
  298. addLiterals := func(s *seq, until int32) {
  299. if until == nextEmit {
  300. return
  301. }
  302. blk.literals = append(blk.literals, src[nextEmit:until]...)
  303. s.litLen = uint32(until - nextEmit)
  304. }
  305. if debugEncoder {
  306. println("recent offsets:", blk.recentOffsets)
  307. }
  308. encodeLoop:
  309. for {
  310. // t will contain the match offset when we find one.
  311. // When existing the search loop, we have already checked 4 bytes.
  312. var t int32
  313. // We will not use repeat offsets across blocks.
  314. // By not using them for the first 3 matches
  315. for {
  316. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  317. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  318. candidate := e.table[nextHash]
  319. candidate2 := e.table[nextHash2]
  320. repIndex := s - offset1 + 2
  321. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  322. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  323. if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
  324. // Consider history as well.
  325. var seq seq
  326. length := 4 + e.matchlen(s+6, repIndex+4, src)
  327. seq.matchLen = uint32(length - zstdMinMatch)
  328. // We might be able to match backwards.
  329. // Extend as long as we can.
  330. start := s + 2
  331. // We end the search early, so we don't risk 0 literals
  332. // and have to do special offset treatment.
  333. startLimit := nextEmit + 1
  334. sMin := max(s-e.maxMatchOff, 0)
  335. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] {
  336. repIndex--
  337. start--
  338. seq.matchLen++
  339. }
  340. addLiterals(&seq, start)
  341. // rep 0
  342. seq.offset = 1
  343. if debugSequences {
  344. println("repeat sequence", seq, "next s:", s)
  345. }
  346. blk.sequences = append(blk.sequences, seq)
  347. s += length + 2
  348. nextEmit = s
  349. if s >= sLimit {
  350. if debugEncoder {
  351. println("repeat ended", s, length)
  352. }
  353. break encodeLoop
  354. }
  355. cv = load6432(src, s)
  356. continue
  357. }
  358. coffset0 := s - (candidate.offset - e.cur)
  359. coffset1 := s - (candidate2.offset - e.cur) + 1
  360. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  361. // found a regular match
  362. t = candidate.offset - e.cur
  363. if debugAsserts && s <= t {
  364. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  365. }
  366. if debugAsserts && s-t > e.maxMatchOff {
  367. panic("s - t >e.maxMatchOff")
  368. }
  369. if debugAsserts && t < 0 {
  370. panic(fmt.Sprintf("t (%d) < 0, candidate.offset: %d, e.cur: %d, coffset0: %d, e.maxMatchOff: %d", t, candidate.offset, e.cur, coffset0, e.maxMatchOff))
  371. }
  372. break
  373. }
  374. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  375. // found a regular match
  376. t = candidate2.offset - e.cur
  377. s++
  378. if debugAsserts && s <= t {
  379. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  380. }
  381. if debugAsserts && s-t > e.maxMatchOff {
  382. panic("s - t >e.maxMatchOff")
  383. }
  384. if debugAsserts && t < 0 {
  385. panic("t<0")
  386. }
  387. break
  388. }
  389. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  390. if s >= sLimit {
  391. break encodeLoop
  392. }
  393. cv = load6432(src, s)
  394. }
  395. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  396. offset2 = offset1
  397. offset1 = s - t
  398. if debugAsserts && s <= t {
  399. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  400. }
  401. if debugAsserts && t < 0 {
  402. panic(fmt.Sprintf("t (%d) < 0 ", t))
  403. }
  404. // Extend the 4-byte match as long as possible.
  405. l := e.matchlen(s+4, t+4, src) + 4
  406. // Extend backwards
  407. tMin := max(s-e.maxMatchOff, 0)
  408. for t > tMin && s > nextEmit && src[t-1] == src[s-1] {
  409. s--
  410. t--
  411. l++
  412. }
  413. // Write our sequence.
  414. var seq seq
  415. seq.litLen = uint32(s - nextEmit)
  416. seq.matchLen = uint32(l - zstdMinMatch)
  417. if seq.litLen > 0 {
  418. blk.literals = append(blk.literals, src[nextEmit:s]...)
  419. }
  420. // Don't use repeat offsets
  421. seq.offset = uint32(s-t) + 3
  422. s += l
  423. if debugSequences {
  424. println("sequence", seq, "next s:", s)
  425. }
  426. blk.sequences = append(blk.sequences, seq)
  427. nextEmit = s
  428. if s >= sLimit {
  429. break encodeLoop
  430. }
  431. cv = load6432(src, s)
  432. // Check offset 2
  433. if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
  434. // We have at least 4 byte match.
  435. // No need to check backwards. We come straight from a match
  436. l := 4 + e.matchlen(s+4, o2+4, src)
  437. // Store this, since we have it.
  438. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  439. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  440. seq.matchLen = uint32(l) - zstdMinMatch
  441. seq.litLen = 0
  442. // Since litlen is always 0, this is offset 1.
  443. seq.offset = 1
  444. s += l
  445. nextEmit = s
  446. if debugSequences {
  447. println("sequence", seq, "next s:", s)
  448. }
  449. blk.sequences = append(blk.sequences, seq)
  450. // Swap offset 1 and 2.
  451. offset1, offset2 = offset2, offset1
  452. if s >= sLimit {
  453. break encodeLoop
  454. }
  455. // Prepare next loop.
  456. cv = load6432(src, s)
  457. }
  458. }
  459. if int(nextEmit) < len(src) {
  460. blk.literals = append(blk.literals, src[nextEmit:]...)
  461. blk.extraLits = len(src) - int(nextEmit)
  462. }
  463. if debugEncoder {
  464. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  465. }
  466. // We do not store history, so we must offset e.cur to avoid false matches for next user.
  467. if e.cur < e.bufferReset {
  468. e.cur += int32(len(src))
  469. }
  470. }
  471. // Encode will encode the content, with a dictionary if initialized for it.
  472. func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
  473. const (
  474. inputMargin = 8
  475. minNonLiteralBlockSize = 1 + 1 + inputMargin
  476. )
  477. if e.allDirty || len(src) > 32<<10 {
  478. e.fastEncoder.Encode(blk, src)
  479. e.allDirty = true
  480. return
  481. }
  482. // Protect against e.cur wraparound.
  483. for e.cur >= e.bufferReset-int32(len(e.hist)) {
  484. if len(e.hist) == 0 {
  485. e.table = [tableSize]tableEntry{}
  486. e.cur = e.maxMatchOff
  487. break
  488. }
  489. // Shift down everything in the table that isn't already too far away.
  490. minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
  491. for i := range e.table[:] {
  492. v := e.table[i].offset
  493. if v < minOff {
  494. v = 0
  495. } else {
  496. v = v - e.cur + e.maxMatchOff
  497. }
  498. e.table[i].offset = v
  499. }
  500. e.cur = e.maxMatchOff
  501. break
  502. }
  503. s := e.addBlock(src)
  504. blk.size = len(src)
  505. if len(src) < minNonLiteralBlockSize {
  506. blk.extraLits = len(src)
  507. blk.literals = blk.literals[:len(src)]
  508. copy(blk.literals, src)
  509. return
  510. }
  511. // Override src
  512. src = e.hist
  513. sLimit := int32(len(src)) - inputMargin
  514. // stepSize is the number of bytes to skip on every main loop iteration.
  515. // It should be >= 2.
  516. const stepSize = 2
  517. // TEMPLATE
  518. const hashLog = tableBits
  519. // seems global, but would be nice to tweak.
  520. const kSearchStrength = 7
  521. // nextEmit is where in src the next emitLiteral should start from.
  522. nextEmit := s
  523. cv := load6432(src, s)
  524. // Relative offsets
  525. offset1 := int32(blk.recentOffsets[0])
  526. offset2 := int32(blk.recentOffsets[1])
  527. addLiterals := func(s *seq, until int32) {
  528. if until == nextEmit {
  529. return
  530. }
  531. blk.literals = append(blk.literals, src[nextEmit:until]...)
  532. s.litLen = uint32(until - nextEmit)
  533. }
  534. if debugEncoder {
  535. println("recent offsets:", blk.recentOffsets)
  536. }
  537. encodeLoop:
  538. for {
  539. // t will contain the match offset when we find one.
  540. // When existing the search loop, we have already checked 4 bytes.
  541. var t int32
  542. // We will not use repeat offsets across blocks.
  543. // By not using them for the first 3 matches
  544. canRepeat := len(blk.sequences) > 2
  545. for {
  546. if debugAsserts && canRepeat && offset1 == 0 {
  547. panic("offset0 was 0")
  548. }
  549. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  550. nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen)
  551. candidate := e.table[nextHash]
  552. candidate2 := e.table[nextHash2]
  553. repIndex := s - offset1 + 2
  554. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  555. e.markShardDirty(nextHash)
  556. e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
  557. e.markShardDirty(nextHash2)
  558. if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
  559. // Consider history as well.
  560. var seq seq
  561. length := 4 + e.matchlen(s+6, repIndex+4, src)
  562. seq.matchLen = uint32(length - zstdMinMatch)
  563. // We might be able to match backwards.
  564. // Extend as long as we can.
  565. start := s + 2
  566. // We end the search early, so we don't risk 0 literals
  567. // and have to do special offset treatment.
  568. startLimit := nextEmit + 1
  569. sMin := max(s-e.maxMatchOff, 0)
  570. for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
  571. repIndex--
  572. start--
  573. seq.matchLen++
  574. }
  575. addLiterals(&seq, start)
  576. // rep 0
  577. seq.offset = 1
  578. if debugSequences {
  579. println("repeat sequence", seq, "next s:", s)
  580. }
  581. blk.sequences = append(blk.sequences, seq)
  582. s += length + 2
  583. nextEmit = s
  584. if s >= sLimit {
  585. if debugEncoder {
  586. println("repeat ended", s, length)
  587. }
  588. break encodeLoop
  589. }
  590. cv = load6432(src, s)
  591. continue
  592. }
  593. coffset0 := s - (candidate.offset - e.cur)
  594. coffset1 := s - (candidate2.offset - e.cur) + 1
  595. if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
  596. // found a regular match
  597. t = candidate.offset - e.cur
  598. if debugAsserts && s <= t {
  599. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  600. }
  601. if debugAsserts && s-t > e.maxMatchOff {
  602. panic("s - t >e.maxMatchOff")
  603. }
  604. break
  605. }
  606. if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
  607. // found a regular match
  608. t = candidate2.offset - e.cur
  609. s++
  610. if debugAsserts && s <= t {
  611. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  612. }
  613. if debugAsserts && s-t > e.maxMatchOff {
  614. panic("s - t >e.maxMatchOff")
  615. }
  616. if debugAsserts && t < 0 {
  617. panic("t<0")
  618. }
  619. break
  620. }
  621. s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
  622. if s >= sLimit {
  623. break encodeLoop
  624. }
  625. cv = load6432(src, s)
  626. }
  627. // A 4-byte match has been found. We'll later see if more than 4 bytes.
  628. offset2 = offset1
  629. offset1 = s - t
  630. if debugAsserts && s <= t {
  631. panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
  632. }
  633. if debugAsserts && canRepeat && int(offset1) > len(src) {
  634. panic("invalid offset")
  635. }
  636. // Extend the 4-byte match as long as possible.
  637. l := e.matchlen(s+4, t+4, src) + 4
  638. // Extend backwards
  639. tMin := max(s-e.maxMatchOff, 0)
  640. for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
  641. s--
  642. t--
  643. l++
  644. }
  645. // Write our sequence.
  646. var seq seq
  647. seq.litLen = uint32(s - nextEmit)
  648. seq.matchLen = uint32(l - zstdMinMatch)
  649. if seq.litLen > 0 {
  650. blk.literals = append(blk.literals, src[nextEmit:s]...)
  651. }
  652. // Don't use repeat offsets
  653. seq.offset = uint32(s-t) + 3
  654. s += l
  655. if debugSequences {
  656. println("sequence", seq, "next s:", s)
  657. }
  658. blk.sequences = append(blk.sequences, seq)
  659. nextEmit = s
  660. if s >= sLimit {
  661. break encodeLoop
  662. }
  663. cv = load6432(src, s)
  664. // Check offset 2
  665. if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
  666. // We have at least 4 byte match.
  667. // No need to check backwards. We come straight from a match
  668. l := 4 + e.matchlen(s+4, o2+4, src)
  669. // Store this, since we have it.
  670. nextHash := hashLen(cv, hashLog, tableFastHashLen)
  671. e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
  672. e.markShardDirty(nextHash)
  673. seq.matchLen = uint32(l) - zstdMinMatch
  674. seq.litLen = 0
  675. // Since litlen is always 0, this is offset 1.
  676. seq.offset = 1
  677. s += l
  678. nextEmit = s
  679. if debugSequences {
  680. println("sequence", seq, "next s:", s)
  681. }
  682. blk.sequences = append(blk.sequences, seq)
  683. // Swap offset 1 and 2.
  684. offset1, offset2 = offset2, offset1
  685. if s >= sLimit {
  686. break encodeLoop
  687. }
  688. // Prepare next loop.
  689. cv = load6432(src, s)
  690. }
  691. }
  692. if int(nextEmit) < len(src) {
  693. blk.literals = append(blk.literals, src[nextEmit:]...)
  694. blk.extraLits = len(src) - int(nextEmit)
  695. }
  696. blk.recentOffsets[0] = uint32(offset1)
  697. blk.recentOffsets[1] = uint32(offset2)
  698. if debugEncoder {
  699. println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
  700. }
  701. }
  702. // ResetDict will reset and set a dictionary if not nil
  703. func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
  704. e.resetBase(d, singleBlock)
  705. if d != nil {
  706. panic("fastEncoder: Reset with dict")
  707. }
  708. }
  709. // ResetDict will reset and set a dictionary if not nil
  710. func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
  711. e.resetBase(d, singleBlock)
  712. if d == nil {
  713. return
  714. }
  715. // Init or copy dict table
  716. if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
  717. if len(e.dictTable) != len(e.table) {
  718. e.dictTable = make([]tableEntry, len(e.table))
  719. }
  720. if true {
  721. end := e.maxMatchOff + int32(len(d.content)) - 8
  722. for i := e.maxMatchOff; i < end; i += 2 {
  723. const hashLog = tableBits
  724. cv := load6432(d.content, i-e.maxMatchOff)
  725. nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 6
  726. nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 7
  727. e.dictTable[nextHash] = tableEntry{
  728. val: uint32(cv),
  729. offset: i,
  730. }
  731. e.dictTable[nextHash1] = tableEntry{
  732. val: uint32(cv >> 8),
  733. offset: i + 1,
  734. }
  735. }
  736. }
  737. e.lastDictID = d.id
  738. e.allDirty = true
  739. }
  740. e.cur = e.maxMatchOff
  741. dirtyShardCnt := 0
  742. if !e.allDirty {
  743. for i := range e.tableShardDirty {
  744. if e.tableShardDirty[i] {
  745. dirtyShardCnt++
  746. }
  747. }
  748. }
  749. const shardCnt = tableShardCnt
  750. const shardSize = tableShardSize
  751. if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
  752. //copy(e.table[:], e.dictTable)
  753. e.table = *(*[tableSize]tableEntry)(e.dictTable)
  754. for i := range e.tableShardDirty {
  755. e.tableShardDirty[i] = false
  756. }
  757. e.allDirty = false
  758. return
  759. }
  760. for i := range e.tableShardDirty {
  761. if !e.tableShardDirty[i] {
  762. continue
  763. }
  764. //copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
  765. *(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
  766. e.tableShardDirty[i] = false
  767. }
  768. e.allDirty = false
  769. }
  770. func (e *fastEncoderDict) markAllShardsDirty() {
  771. e.allDirty = true
  772. }
  773. func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
  774. e.tableShardDirty[entryNum/tableShardSize] = true
  775. }