encode_all.go 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480
  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "bytes"
  8. "encoding/binary"
  9. "fmt"
  10. "math/bits"
  11. "github.com/klauspost/compress/internal/le"
  12. )
  13. func load32(b []byte, i int) uint32 {
  14. return le.Load32(b, i)
  15. }
  16. func load64(b []byte, i int) uint64 {
  17. return le.Load64(b, i)
  18. }
  19. // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
  20. // Preferably h should be a constant and should always be <64.
  21. func hash6(u uint64, h uint8) uint32 {
  22. const prime6bytes = 227718039650203
  23. return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
  24. }
  25. func encodeGo(dst, src []byte) []byte {
  26. if n := MaxEncodedLen(len(src)); n < 0 {
  27. panic(ErrTooLarge)
  28. } else if len(dst) < n {
  29. dst = make([]byte, n)
  30. }
  31. // The block starts with the varint-encoded length of the decompressed bytes.
  32. d := binary.PutUvarint(dst, uint64(len(src)))
  33. if len(src) == 0 {
  34. return dst[:d]
  35. }
  36. if len(src) < minNonLiteralBlockSize {
  37. d += emitLiteral(dst[d:], src)
  38. return dst[:d]
  39. }
  40. var n int
  41. if len(src) < 64<<10 {
  42. n = encodeBlockGo64K(dst[d:], src)
  43. } else {
  44. n = encodeBlockGo(dst[d:], src)
  45. }
  46. if n > 0 {
  47. d += n
  48. return dst[:d]
  49. }
  50. // Not compressible
  51. d += emitLiteral(dst[d:], src)
  52. return dst[:d]
  53. }
  54. // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  55. // assumes that the varint-encoded length of the decompressed bytes has already
  56. // been written.
  57. //
  58. // It also assumes that:
  59. //
  60. // len(dst) >= MaxEncodedLen(len(src)) &&
  61. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  62. func encodeBlockGo(dst, src []byte) (d int) {
  63. // Initialize the hash table.
  64. const (
  65. tableBits = 14
  66. maxTableSize = 1 << tableBits
  67. debug = false
  68. )
  69. var table [maxTableSize]uint32
  70. // sLimit is when to stop looking for offset/length copies. The inputMargin
  71. // lets us use a fast path for emitLiteral in the main loop, while we are
  72. // looking for copies.
  73. sLimit := len(src) - inputMargin
  74. // Bail if we can't compress to at least this.
  75. dstLimit := len(src) - len(src)>>5 - 5
  76. // nextEmit is where in src the next emitLiteral should start from.
  77. nextEmit := 0
  78. // The encoded form must start with a literal, as there are no previous
  79. // bytes to copy, so we start looking for hash matches at s == 1.
  80. s := 1
  81. cv := load64(src, s)
  82. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  83. repeat := 1
  84. for {
  85. candidate := 0
  86. for {
  87. // Next src position to check
  88. nextS := s + (s-nextEmit)>>6 + 4
  89. if nextS > sLimit {
  90. goto emitRemainder
  91. }
  92. hash0 := hash6(cv, tableBits)
  93. hash1 := hash6(cv>>8, tableBits)
  94. candidate = int(table[hash0])
  95. candidate2 := int(table[hash1])
  96. table[hash0] = uint32(s)
  97. table[hash1] = uint32(s + 1)
  98. hash2 := hash6(cv>>16, tableBits)
  99. // Check repeat at offset checkRep.
  100. const checkRep = 1
  101. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  102. base := s + checkRep
  103. // Extend back
  104. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  105. i--
  106. base--
  107. }
  108. // Bail if we exceed the maximum size.
  109. if d+(base-nextEmit) > dstLimit {
  110. return 0
  111. }
  112. d += emitLiteral(dst[d:], src[nextEmit:base])
  113. // Extend forward
  114. candidate := s - repeat + 4 + checkRep
  115. s += 4 + checkRep
  116. for s <= sLimit {
  117. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  118. s += bits.TrailingZeros64(diff) >> 3
  119. break
  120. }
  121. s += 8
  122. candidate += 8
  123. }
  124. if debug {
  125. // Validate match.
  126. if s <= candidate {
  127. panic("s <= candidate")
  128. }
  129. a := src[base:s]
  130. b := src[base-repeat : base-repeat+(s-base)]
  131. if !bytes.Equal(a, b) {
  132. panic("mismatch")
  133. }
  134. }
  135. if nextEmit > 0 {
  136. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  137. d += emitRepeat(dst[d:], repeat, s-base)
  138. } else {
  139. // First match, cannot be repeat.
  140. d += emitCopy(dst[d:], repeat, s-base)
  141. }
  142. nextEmit = s
  143. if s >= sLimit {
  144. goto emitRemainder
  145. }
  146. cv = load64(src, s)
  147. continue
  148. }
  149. if uint32(cv) == load32(src, candidate) {
  150. break
  151. }
  152. candidate = int(table[hash2])
  153. if uint32(cv>>8) == load32(src, candidate2) {
  154. table[hash2] = uint32(s + 2)
  155. candidate = candidate2
  156. s++
  157. break
  158. }
  159. table[hash2] = uint32(s + 2)
  160. if uint32(cv>>16) == load32(src, candidate) {
  161. s += 2
  162. break
  163. }
  164. cv = load64(src, nextS)
  165. s = nextS
  166. }
  167. // Extend backwards.
  168. // The top bytes will be rechecked to get the full match.
  169. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  170. candidate--
  171. s--
  172. }
  173. // Bail if we exceed the maximum size.
  174. if d+(s-nextEmit) > dstLimit {
  175. return 0
  176. }
  177. // A 4-byte match has been found. We'll later see if more than 4 bytes
  178. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  179. // them as literal bytes.
  180. d += emitLiteral(dst[d:], src[nextEmit:s])
  181. // Call emitCopy, and then see if another emitCopy could be our next
  182. // move. Repeat until we find no match for the input immediately after
  183. // what was consumed by the last emitCopy call.
  184. //
  185. // If we exit this loop normally then we need to call emitLiteral next,
  186. // though we don't yet know how big the literal will be. We handle that
  187. // by proceeding to the next iteration of the main loop. We also can
  188. // exit this loop via goto if we get close to exhausting the input.
  189. for {
  190. // Invariant: we have a 4-byte match at s, and no need to emit any
  191. // literal bytes prior to s.
  192. base := s
  193. repeat = base - candidate
  194. // Extend the 4-byte match as long as possible.
  195. s += 4
  196. candidate += 4
  197. for s <= len(src)-8 {
  198. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  199. s += bits.TrailingZeros64(diff) >> 3
  200. break
  201. }
  202. s += 8
  203. candidate += 8
  204. }
  205. d += emitCopy(dst[d:], repeat, s-base)
  206. if debug {
  207. // Validate match.
  208. if s <= candidate {
  209. panic("s <= candidate")
  210. }
  211. a := src[base:s]
  212. b := src[base-repeat : base-repeat+(s-base)]
  213. if !bytes.Equal(a, b) {
  214. panic("mismatch")
  215. }
  216. }
  217. nextEmit = s
  218. if s >= sLimit {
  219. goto emitRemainder
  220. }
  221. if d > dstLimit {
  222. // Do we have space for more, if not bail.
  223. return 0
  224. }
  225. // Check for an immediate match, otherwise start search at s+1
  226. x := load64(src, s-2)
  227. m2Hash := hash6(x, tableBits)
  228. currHash := hash6(x>>16, tableBits)
  229. candidate = int(table[currHash])
  230. table[m2Hash] = uint32(s - 2)
  231. table[currHash] = uint32(s)
  232. if debug && s == candidate {
  233. panic("s == candidate")
  234. }
  235. if uint32(x>>16) != load32(src, candidate) {
  236. cv = load64(src, s+1)
  237. s++
  238. break
  239. }
  240. }
  241. }
  242. emitRemainder:
  243. if nextEmit < len(src) {
  244. // Bail if we exceed the maximum size.
  245. if d+len(src)-nextEmit > dstLimit {
  246. return 0
  247. }
  248. d += emitLiteral(dst[d:], src[nextEmit:])
  249. }
  250. return d
  251. }
  252. // encodeBlockGo64K is a specialized version for compressing blocks <= 64KB
  253. func encodeBlockGo64K(dst, src []byte) (d int) {
  254. // Initialize the hash table.
  255. const (
  256. tableBits = 14
  257. maxTableSize = 1 << tableBits
  258. debug = false
  259. )
  260. var table [maxTableSize]uint16
  261. // sLimit is when to stop looking for offset/length copies. The inputMargin
  262. // lets us use a fast path for emitLiteral in the main loop, while we are
  263. // looking for copies.
  264. sLimit := len(src) - inputMargin
  265. // Bail if we can't compress to at least this.
  266. dstLimit := len(src) - len(src)>>5 - 5
  267. // nextEmit is where in src the next emitLiteral should start from.
  268. nextEmit := 0
  269. // The encoded form must start with a literal, as there are no previous
  270. // bytes to copy, so we start looking for hash matches at s == 1.
  271. s := 1
  272. cv := load64(src, s)
  273. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  274. repeat := 1
  275. for {
  276. candidate := 0
  277. for {
  278. // Next src position to check
  279. nextS := s + (s-nextEmit)>>5 + 4
  280. if nextS > sLimit {
  281. goto emitRemainder
  282. }
  283. hash0 := hash6(cv, tableBits)
  284. hash1 := hash6(cv>>8, tableBits)
  285. candidate = int(table[hash0])
  286. candidate2 := int(table[hash1])
  287. table[hash0] = uint16(s)
  288. table[hash1] = uint16(s + 1)
  289. hash2 := hash6(cv>>16, tableBits)
  290. // Check repeat at offset checkRep.
  291. const checkRep = 1
  292. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  293. base := s + checkRep
  294. // Extend back
  295. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  296. i--
  297. base--
  298. }
  299. // Bail if we exceed the maximum size.
  300. if d+(base-nextEmit) > dstLimit {
  301. return 0
  302. }
  303. d += emitLiteral(dst[d:], src[nextEmit:base])
  304. // Extend forward
  305. candidate := s - repeat + 4 + checkRep
  306. s += 4 + checkRep
  307. for s <= sLimit {
  308. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  309. s += bits.TrailingZeros64(diff) >> 3
  310. break
  311. }
  312. s += 8
  313. candidate += 8
  314. }
  315. if debug {
  316. // Validate match.
  317. if s <= candidate {
  318. panic("s <= candidate")
  319. }
  320. a := src[base:s]
  321. b := src[base-repeat : base-repeat+(s-base)]
  322. if !bytes.Equal(a, b) {
  323. panic("mismatch")
  324. }
  325. }
  326. if nextEmit > 0 {
  327. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  328. d += emitRepeat(dst[d:], repeat, s-base)
  329. } else {
  330. // First match, cannot be repeat.
  331. d += emitCopy(dst[d:], repeat, s-base)
  332. }
  333. nextEmit = s
  334. if s >= sLimit {
  335. goto emitRemainder
  336. }
  337. cv = load64(src, s)
  338. continue
  339. }
  340. if uint32(cv) == load32(src, candidate) {
  341. break
  342. }
  343. candidate = int(table[hash2])
  344. if uint32(cv>>8) == load32(src, candidate2) {
  345. table[hash2] = uint16(s + 2)
  346. candidate = candidate2
  347. s++
  348. break
  349. }
  350. table[hash2] = uint16(s + 2)
  351. if uint32(cv>>16) == load32(src, candidate) {
  352. s += 2
  353. break
  354. }
  355. cv = load64(src, nextS)
  356. s = nextS
  357. }
  358. // Extend backwards.
  359. // The top bytes will be rechecked to get the full match.
  360. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  361. candidate--
  362. s--
  363. }
  364. // Bail if we exceed the maximum size.
  365. if d+(s-nextEmit) > dstLimit {
  366. return 0
  367. }
  368. // A 4-byte match has been found. We'll later see if more than 4 bytes
  369. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  370. // them as literal bytes.
  371. d += emitLiteral(dst[d:], src[nextEmit:s])
  372. // Call emitCopy, and then see if another emitCopy could be our next
  373. // move. Repeat until we find no match for the input immediately after
  374. // what was consumed by the last emitCopy call.
  375. //
  376. // If we exit this loop normally then we need to call emitLiteral next,
  377. // though we don't yet know how big the literal will be. We handle that
  378. // by proceeding to the next iteration of the main loop. We also can
  379. // exit this loop via goto if we get close to exhausting the input.
  380. for {
  381. // Invariant: we have a 4-byte match at s, and no need to emit any
  382. // literal bytes prior to s.
  383. base := s
  384. repeat = base - candidate
  385. // Extend the 4-byte match as long as possible.
  386. s += 4
  387. candidate += 4
  388. for s <= len(src)-8 {
  389. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  390. s += bits.TrailingZeros64(diff) >> 3
  391. break
  392. }
  393. s += 8
  394. candidate += 8
  395. }
  396. d += emitCopy(dst[d:], repeat, s-base)
  397. if debug {
  398. // Validate match.
  399. if s <= candidate {
  400. panic("s <= candidate")
  401. }
  402. a := src[base:s]
  403. b := src[base-repeat : base-repeat+(s-base)]
  404. if !bytes.Equal(a, b) {
  405. panic("mismatch")
  406. }
  407. }
  408. nextEmit = s
  409. if s >= sLimit {
  410. goto emitRemainder
  411. }
  412. if d > dstLimit {
  413. // Do we have space for more, if not bail.
  414. return 0
  415. }
  416. // Check for an immediate match, otherwise start search at s+1
  417. x := load64(src, s-2)
  418. m2Hash := hash6(x, tableBits)
  419. currHash := hash6(x>>16, tableBits)
  420. candidate = int(table[currHash])
  421. table[m2Hash] = uint16(s - 2)
  422. table[currHash] = uint16(s)
  423. if debug && s == candidate {
  424. panic("s == candidate")
  425. }
  426. if uint32(x>>16) != load32(src, candidate) {
  427. cv = load64(src, s+1)
  428. s++
  429. break
  430. }
  431. }
  432. }
  433. emitRemainder:
  434. if nextEmit < len(src) {
  435. // Bail if we exceed the maximum size.
  436. if d+len(src)-nextEmit > dstLimit {
  437. return 0
  438. }
  439. d += emitLiteral(dst[d:], src[nextEmit:])
  440. }
  441. return d
  442. }
  443. func encodeBlockSnappyGo(dst, src []byte) (d int) {
  444. // Initialize the hash table.
  445. const (
  446. tableBits = 14
  447. maxTableSize = 1 << tableBits
  448. )
  449. var table [maxTableSize]uint32
  450. // sLimit is when to stop looking for offset/length copies. The inputMargin
  451. // lets us use a fast path for emitLiteral in the main loop, while we are
  452. // looking for copies.
  453. sLimit := len(src) - inputMargin
  454. // Bail if we can't compress to at least this.
  455. dstLimit := len(src) - len(src)>>5 - 5
  456. // nextEmit is where in src the next emitLiteral should start from.
  457. nextEmit := 0
  458. // The encoded form must start with a literal, as there are no previous
  459. // bytes to copy, so we start looking for hash matches at s == 1.
  460. s := 1
  461. cv := load64(src, s)
  462. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  463. repeat := 1
  464. for {
  465. candidate := 0
  466. for {
  467. // Next src position to check
  468. nextS := s + (s-nextEmit)>>6 + 4
  469. if nextS > sLimit {
  470. goto emitRemainder
  471. }
  472. hash0 := hash6(cv, tableBits)
  473. hash1 := hash6(cv>>8, tableBits)
  474. candidate = int(table[hash0])
  475. candidate2 := int(table[hash1])
  476. table[hash0] = uint32(s)
  477. table[hash1] = uint32(s + 1)
  478. hash2 := hash6(cv>>16, tableBits)
  479. // Check repeat at offset checkRep.
  480. const checkRep = 1
  481. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  482. base := s + checkRep
  483. // Extend back
  484. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  485. i--
  486. base--
  487. }
  488. // Bail if we exceed the maximum size.
  489. if d+(base-nextEmit) > dstLimit {
  490. return 0
  491. }
  492. d += emitLiteral(dst[d:], src[nextEmit:base])
  493. // Extend forward
  494. candidate := s - repeat + 4 + checkRep
  495. s += 4 + checkRep
  496. for s <= sLimit {
  497. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  498. s += bits.TrailingZeros64(diff) >> 3
  499. break
  500. }
  501. s += 8
  502. candidate += 8
  503. }
  504. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  505. nextEmit = s
  506. if s >= sLimit {
  507. goto emitRemainder
  508. }
  509. cv = load64(src, s)
  510. continue
  511. }
  512. if uint32(cv) == load32(src, candidate) {
  513. break
  514. }
  515. candidate = int(table[hash2])
  516. if uint32(cv>>8) == load32(src, candidate2) {
  517. table[hash2] = uint32(s + 2)
  518. candidate = candidate2
  519. s++
  520. break
  521. }
  522. table[hash2] = uint32(s + 2)
  523. if uint32(cv>>16) == load32(src, candidate) {
  524. s += 2
  525. break
  526. }
  527. cv = load64(src, nextS)
  528. s = nextS
  529. }
  530. // Extend backwards
  531. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  532. candidate--
  533. s--
  534. }
  535. // Bail if we exceed the maximum size.
  536. if d+(s-nextEmit) > dstLimit {
  537. return 0
  538. }
  539. // A 4-byte match has been found. We'll later see if more than 4 bytes
  540. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  541. // them as literal bytes.
  542. d += emitLiteral(dst[d:], src[nextEmit:s])
  543. // Call emitCopy, and then see if another emitCopy could be our next
  544. // move. Repeat until we find no match for the input immediately after
  545. // what was consumed by the last emitCopy call.
  546. //
  547. // If we exit this loop normally then we need to call emitLiteral next,
  548. // though we don't yet know how big the literal will be. We handle that
  549. // by proceeding to the next iteration of the main loop. We also can
  550. // exit this loop via goto if we get close to exhausting the input.
  551. for {
  552. // Invariant: we have a 4-byte match at s, and no need to emit any
  553. // literal bytes prior to s.
  554. base := s
  555. repeat = base - candidate
  556. // Extend the 4-byte match as long as possible.
  557. s += 4
  558. candidate += 4
  559. for s <= len(src)-8 {
  560. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  561. s += bits.TrailingZeros64(diff) >> 3
  562. break
  563. }
  564. s += 8
  565. candidate += 8
  566. }
  567. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  568. if false {
  569. // Validate match.
  570. a := src[base:s]
  571. b := src[base-repeat : base-repeat+(s-base)]
  572. if !bytes.Equal(a, b) {
  573. panic("mismatch")
  574. }
  575. }
  576. nextEmit = s
  577. if s >= sLimit {
  578. goto emitRemainder
  579. }
  580. if d > dstLimit {
  581. // Do we have space for more, if not bail.
  582. return 0
  583. }
  584. // Check for an immediate match, otherwise start search at s+1
  585. x := load64(src, s-2)
  586. m2Hash := hash6(x, tableBits)
  587. currHash := hash6(x>>16, tableBits)
  588. candidate = int(table[currHash])
  589. table[m2Hash] = uint32(s - 2)
  590. table[currHash] = uint32(s)
  591. if uint32(x>>16) != load32(src, candidate) {
  592. cv = load64(src, s+1)
  593. s++
  594. break
  595. }
  596. }
  597. }
  598. emitRemainder:
  599. if nextEmit < len(src) {
  600. // Bail if we exceed the maximum size.
  601. if d+len(src)-nextEmit > dstLimit {
  602. return 0
  603. }
  604. d += emitLiteral(dst[d:], src[nextEmit:])
  605. }
  606. return d
  607. }
  608. // encodeBlockSnappyGo64K is a special version of encodeBlockSnappyGo for sizes <64KB
  609. func encodeBlockSnappyGo64K(dst, src []byte) (d int) {
  610. // Initialize the hash table.
  611. const (
  612. tableBits = 14
  613. maxTableSize = 1 << tableBits
  614. )
  615. var table [maxTableSize]uint16
  616. // sLimit is when to stop looking for offset/length copies. The inputMargin
  617. // lets us use a fast path for emitLiteral in the main loop, while we are
  618. // looking for copies.
  619. sLimit := len(src) - inputMargin
  620. // Bail if we can't compress to at least this.
  621. dstLimit := len(src) - len(src)>>5 - 5
  622. // nextEmit is where in src the next emitLiteral should start from.
  623. nextEmit := 0
  624. // The encoded form must start with a literal, as there are no previous
  625. // bytes to copy, so we start looking for hash matches at s == 1.
  626. s := 1
  627. cv := load64(src, s)
  628. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  629. repeat := 1
  630. for {
  631. candidate := 0
  632. for {
  633. // Next src position to check
  634. nextS := s + (s-nextEmit)>>5 + 4
  635. if nextS > sLimit {
  636. goto emitRemainder
  637. }
  638. hash0 := hash6(cv, tableBits)
  639. hash1 := hash6(cv>>8, tableBits)
  640. candidate = int(table[hash0])
  641. candidate2 := int(table[hash1])
  642. table[hash0] = uint16(s)
  643. table[hash1] = uint16(s + 1)
  644. hash2 := hash6(cv>>16, tableBits)
  645. // Check repeat at offset checkRep.
  646. const checkRep = 1
  647. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  648. base := s + checkRep
  649. // Extend back
  650. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  651. i--
  652. base--
  653. }
  654. // Bail if we exceed the maximum size.
  655. if d+(base-nextEmit) > dstLimit {
  656. return 0
  657. }
  658. d += emitLiteral(dst[d:], src[nextEmit:base])
  659. // Extend forward
  660. candidate := s - repeat + 4 + checkRep
  661. s += 4 + checkRep
  662. for s <= sLimit {
  663. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  664. s += bits.TrailingZeros64(diff) >> 3
  665. break
  666. }
  667. s += 8
  668. candidate += 8
  669. }
  670. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  671. nextEmit = s
  672. if s >= sLimit {
  673. goto emitRemainder
  674. }
  675. cv = load64(src, s)
  676. continue
  677. }
  678. if uint32(cv) == load32(src, candidate) {
  679. break
  680. }
  681. candidate = int(table[hash2])
  682. if uint32(cv>>8) == load32(src, candidate2) {
  683. table[hash2] = uint16(s + 2)
  684. candidate = candidate2
  685. s++
  686. break
  687. }
  688. table[hash2] = uint16(s + 2)
  689. if uint32(cv>>16) == load32(src, candidate) {
  690. s += 2
  691. break
  692. }
  693. cv = load64(src, nextS)
  694. s = nextS
  695. }
  696. // Extend backwards
  697. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  698. candidate--
  699. s--
  700. }
  701. // Bail if we exceed the maximum size.
  702. if d+(s-nextEmit) > dstLimit {
  703. return 0
  704. }
  705. // A 4-byte match has been found. We'll later see if more than 4 bytes
  706. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  707. // them as literal bytes.
  708. d += emitLiteral(dst[d:], src[nextEmit:s])
  709. // Call emitCopy, and then see if another emitCopy could be our next
  710. // move. Repeat until we find no match for the input immediately after
  711. // what was consumed by the last emitCopy call.
  712. //
  713. // If we exit this loop normally then we need to call emitLiteral next,
  714. // though we don't yet know how big the literal will be. We handle that
  715. // by proceeding to the next iteration of the main loop. We also can
  716. // exit this loop via goto if we get close to exhausting the input.
  717. for {
  718. // Invariant: we have a 4-byte match at s, and no need to emit any
  719. // literal bytes prior to s.
  720. base := s
  721. repeat = base - candidate
  722. // Extend the 4-byte match as long as possible.
  723. s += 4
  724. candidate += 4
  725. for s <= len(src)-8 {
  726. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  727. s += bits.TrailingZeros64(diff) >> 3
  728. break
  729. }
  730. s += 8
  731. candidate += 8
  732. }
  733. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  734. if false {
  735. // Validate match.
  736. a := src[base:s]
  737. b := src[base-repeat : base-repeat+(s-base)]
  738. if !bytes.Equal(a, b) {
  739. panic("mismatch")
  740. }
  741. }
  742. nextEmit = s
  743. if s >= sLimit {
  744. goto emitRemainder
  745. }
  746. if d > dstLimit {
  747. // Do we have space for more, if not bail.
  748. return 0
  749. }
  750. // Check for an immediate match, otherwise start search at s+1
  751. x := load64(src, s-2)
  752. m2Hash := hash6(x, tableBits)
  753. currHash := hash6(x>>16, tableBits)
  754. candidate = int(table[currHash])
  755. table[m2Hash] = uint16(s - 2)
  756. table[currHash] = uint16(s)
  757. if uint32(x>>16) != load32(src, candidate) {
  758. cv = load64(src, s+1)
  759. s++
  760. break
  761. }
  762. }
  763. }
  764. emitRemainder:
  765. if nextEmit < len(src) {
  766. // Bail if we exceed the maximum size.
  767. if d+len(src)-nextEmit > dstLimit {
  768. return 0
  769. }
  770. d += emitLiteral(dst[d:], src[nextEmit:])
  771. }
  772. return d
  773. }
  774. // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  775. // assumes that the varint-encoded length of the decompressed bytes has already
  776. // been written.
  777. //
  778. // It also assumes that:
  779. //
  780. // len(dst) >= MaxEncodedLen(len(src)) &&
  781. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  782. func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
  783. // Initialize the hash table.
  784. const (
  785. tableBits = 14
  786. maxTableSize = 1 << tableBits
  787. maxAhead = 8 // maximum bytes ahead without checking sLimit
  788. debug = false
  789. )
  790. dict.initFast()
  791. var table [maxTableSize]uint32
  792. // sLimit is when to stop looking for offset/length copies. The inputMargin
  793. // lets us use a fast path for emitLiteral in the main loop, while we are
  794. // looking for copies.
  795. sLimit := len(src) - inputMargin
  796. if sLimit > MaxDictSrcOffset-maxAhead {
  797. sLimit = MaxDictSrcOffset - maxAhead
  798. }
  799. // Bail if we can't compress to at least this.
  800. dstLimit := len(src) - len(src)>>5 - 5
  801. // nextEmit is where in src the next emitLiteral should start from.
  802. nextEmit := 0
  803. // The encoded form can start with a dict entry (copy or repeat).
  804. s := 0
  805. // Convert dict repeat to offset
  806. repeat := len(dict.dict) - dict.repeat
  807. cv := load64(src, 0)
  808. // While in dict
  809. searchDict:
  810. for {
  811. // Next src position to check
  812. nextS := s + (s-nextEmit)>>6 + 4
  813. hash0 := hash6(cv, tableBits)
  814. hash1 := hash6(cv>>8, tableBits)
  815. if nextS > sLimit {
  816. if debug {
  817. fmt.Println("slimit reached", s, nextS)
  818. }
  819. break searchDict
  820. }
  821. candidateDict := int(dict.fastTable[hash0])
  822. candidateDict2 := int(dict.fastTable[hash1])
  823. candidate2 := int(table[hash1])
  824. candidate := int(table[hash0])
  825. table[hash0] = uint32(s)
  826. table[hash1] = uint32(s + 1)
  827. hash2 := hash6(cv>>16, tableBits)
  828. // Check repeat at offset checkRep.
  829. const checkRep = 1
  830. if repeat > s {
  831. candidate := len(dict.dict) - repeat + s
  832. if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
  833. // Extend back
  834. base := s
  835. for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
  836. i--
  837. base--
  838. }
  839. // Bail if we exceed the maximum size.
  840. if d+(base-nextEmit) > dstLimit {
  841. return 0
  842. }
  843. d += emitLiteral(dst[d:], src[nextEmit:base])
  844. if debug && nextEmit != base {
  845. fmt.Println("emitted ", base-nextEmit, "literals")
  846. }
  847. s += 4
  848. candidate += 4
  849. for candidate < len(dict.dict)-8 && s <= len(src)-8 {
  850. if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
  851. s += bits.TrailingZeros64(diff) >> 3
  852. break
  853. }
  854. s += 8
  855. candidate += 8
  856. }
  857. d += emitRepeat(dst[d:], repeat, s-base)
  858. if debug {
  859. fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
  860. }
  861. nextEmit = s
  862. if s >= sLimit {
  863. break searchDict
  864. }
  865. cv = load64(src, s)
  866. continue
  867. }
  868. } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  869. base := s + checkRep
  870. // Extend back
  871. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  872. i--
  873. base--
  874. }
  875. d += emitLiteral(dst[d:], src[nextEmit:base])
  876. if debug && nextEmit != base {
  877. fmt.Println("emitted ", base-nextEmit, "literals")
  878. }
  879. // Extend forward
  880. candidate := s - repeat + 4 + checkRep
  881. s += 4 + checkRep
  882. for s <= sLimit {
  883. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  884. s += bits.TrailingZeros64(diff) >> 3
  885. break
  886. }
  887. s += 8
  888. candidate += 8
  889. }
  890. if debug {
  891. // Validate match.
  892. if s <= candidate {
  893. panic("s <= candidate")
  894. }
  895. a := src[base:s]
  896. b := src[base-repeat : base-repeat+(s-base)]
  897. if !bytes.Equal(a, b) {
  898. panic("mismatch")
  899. }
  900. }
  901. if nextEmit > 0 {
  902. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  903. d += emitRepeat(dst[d:], repeat, s-base)
  904. } else {
  905. // First match, cannot be repeat.
  906. d += emitCopy(dst[d:], repeat, s-base)
  907. }
  908. nextEmit = s
  909. if s >= sLimit {
  910. break searchDict
  911. }
  912. if debug {
  913. fmt.Println("emitted reg repeat", s-base, "s:", s)
  914. }
  915. cv = load64(src, s)
  916. continue searchDict
  917. }
  918. if s == 0 {
  919. cv = load64(src, nextS)
  920. s = nextS
  921. continue searchDict
  922. }
  923. // Start with table. These matches will always be closer.
  924. if uint32(cv) == load32(src, candidate) {
  925. goto emitMatch
  926. }
  927. candidate = int(table[hash2])
  928. if uint32(cv>>8) == load32(src, candidate2) {
  929. table[hash2] = uint32(s + 2)
  930. candidate = candidate2
  931. s++
  932. goto emitMatch
  933. }
  934. // Check dict. Dicts have longer offsets, so we want longer matches.
  935. if cv == load64(dict.dict, candidateDict) {
  936. table[hash2] = uint32(s + 2)
  937. goto emitDict
  938. }
  939. candidateDict = int(dict.fastTable[hash2])
  940. // Check if upper 7 bytes match
  941. if candidateDict2 >= 1 {
  942. if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
  943. table[hash2] = uint32(s + 2)
  944. candidateDict = candidateDict2
  945. s++
  946. goto emitDict
  947. }
  948. }
  949. table[hash2] = uint32(s + 2)
  950. if uint32(cv>>16) == load32(src, candidate) {
  951. s += 2
  952. goto emitMatch
  953. }
  954. if candidateDict >= 2 {
  955. // Check if upper 6 bytes match
  956. if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
  957. s += 2
  958. goto emitDict
  959. }
  960. }
  961. cv = load64(src, nextS)
  962. s = nextS
  963. continue searchDict
  964. emitDict:
  965. {
  966. if debug {
  967. if load32(dict.dict, candidateDict) != load32(src, s) {
  968. panic("dict emit mismatch")
  969. }
  970. }
  971. // Extend backwards.
  972. // The top bytes will be rechecked to get the full match.
  973. for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
  974. candidateDict--
  975. s--
  976. }
  977. // Bail if we exceed the maximum size.
  978. if d+(s-nextEmit) > dstLimit {
  979. return 0
  980. }
  981. // A 4-byte match has been found. We'll later see if more than 4 bytes
  982. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  983. // them as literal bytes.
  984. d += emitLiteral(dst[d:], src[nextEmit:s])
  985. if debug && nextEmit != s {
  986. fmt.Println("emitted ", s-nextEmit, "literals")
  987. }
  988. {
  989. // Invariant: we have a 4-byte match at s, and no need to emit any
  990. // literal bytes prior to s.
  991. base := s
  992. repeat = s + (len(dict.dict)) - candidateDict
  993. // Extend the 4-byte match as long as possible.
  994. s += 4
  995. candidateDict += 4
  996. for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
  997. if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
  998. s += bits.TrailingZeros64(diff) >> 3
  999. break
  1000. }
  1001. s += 8
  1002. candidateDict += 8
  1003. }
  1004. // Matches longer than 64 are split.
  1005. if s <= sLimit || s-base < 8 {
  1006. d += emitCopy(dst[d:], repeat, s-base)
  1007. } else {
  1008. // Split to ensure we don't start a copy within next block
  1009. d += emitCopy(dst[d:], repeat, 4)
  1010. d += emitRepeat(dst[d:], repeat, s-base-4)
  1011. }
  1012. if false {
  1013. // Validate match.
  1014. if s <= candidate {
  1015. panic("s <= candidate")
  1016. }
  1017. a := src[base:s]
  1018. b := dict.dict[base-repeat : base-repeat+(s-base)]
  1019. if !bytes.Equal(a, b) {
  1020. panic("mismatch")
  1021. }
  1022. }
  1023. if debug {
  1024. fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
  1025. }
  1026. nextEmit = s
  1027. if s >= sLimit {
  1028. break searchDict
  1029. }
  1030. if d > dstLimit {
  1031. // Do we have space for more, if not bail.
  1032. return 0
  1033. }
  1034. // Index and continue loop to try new candidate.
  1035. x := load64(src, s-2)
  1036. m2Hash := hash6(x, tableBits)
  1037. currHash := hash6(x>>8, tableBits)
  1038. table[m2Hash] = uint32(s - 2)
  1039. table[currHash] = uint32(s - 1)
  1040. cv = load64(src, s)
  1041. }
  1042. continue
  1043. }
  1044. emitMatch:
  1045. // Extend backwards.
  1046. // The top bytes will be rechecked to get the full match.
  1047. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  1048. candidate--
  1049. s--
  1050. }
  1051. // Bail if we exceed the maximum size.
  1052. if d+(s-nextEmit) > dstLimit {
  1053. return 0
  1054. }
  1055. // A 4-byte match has been found. We'll later see if more than 4 bytes
  1056. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  1057. // them as literal bytes.
  1058. d += emitLiteral(dst[d:], src[nextEmit:s])
  1059. if debug && nextEmit != s {
  1060. fmt.Println("emitted ", s-nextEmit, "literals")
  1061. }
  1062. // Call emitCopy, and then see if another emitCopy could be our next
  1063. // move. Repeat until we find no match for the input immediately after
  1064. // what was consumed by the last emitCopy call.
  1065. //
  1066. // If we exit this loop normally then we need to call emitLiteral next,
  1067. // though we don't yet know how big the literal will be. We handle that
  1068. // by proceeding to the next iteration of the main loop. We also can
  1069. // exit this loop via goto if we get close to exhausting the input.
  1070. for {
  1071. // Invariant: we have a 4-byte match at s, and no need to emit any
  1072. // literal bytes prior to s.
  1073. base := s
  1074. repeat = base - candidate
  1075. // Extend the 4-byte match as long as possible.
  1076. s += 4
  1077. candidate += 4
  1078. for s <= len(src)-8 {
  1079. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1080. s += bits.TrailingZeros64(diff) >> 3
  1081. break
  1082. }
  1083. s += 8
  1084. candidate += 8
  1085. }
  1086. d += emitCopy(dst[d:], repeat, s-base)
  1087. if debug {
  1088. // Validate match.
  1089. if s <= candidate {
  1090. panic("s <= candidate")
  1091. }
  1092. a := src[base:s]
  1093. b := src[base-repeat : base-repeat+(s-base)]
  1094. if !bytes.Equal(a, b) {
  1095. panic("mismatch")
  1096. }
  1097. }
  1098. if debug {
  1099. fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  1100. }
  1101. nextEmit = s
  1102. if s >= sLimit {
  1103. break searchDict
  1104. }
  1105. if d > dstLimit {
  1106. // Do we have space for more, if not bail.
  1107. return 0
  1108. }
  1109. // Check for an immediate match, otherwise start search at s+1
  1110. x := load64(src, s-2)
  1111. m2Hash := hash6(x, tableBits)
  1112. currHash := hash6(x>>16, tableBits)
  1113. candidate = int(table[currHash])
  1114. table[m2Hash] = uint32(s - 2)
  1115. table[currHash] = uint32(s)
  1116. if debug && s == candidate {
  1117. panic("s == candidate")
  1118. }
  1119. if uint32(x>>16) != load32(src, candidate) {
  1120. cv = load64(src, s+1)
  1121. s++
  1122. break
  1123. }
  1124. }
  1125. }
  1126. // Search without dict:
  1127. if repeat > s {
  1128. repeat = 0
  1129. }
  1130. // No more dict
  1131. sLimit = len(src) - inputMargin
  1132. if s >= sLimit {
  1133. goto emitRemainder
  1134. }
  1135. if debug {
  1136. fmt.Println("non-dict matching at", s, "repeat:", repeat)
  1137. }
  1138. cv = load64(src, s)
  1139. if debug {
  1140. fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
  1141. }
  1142. for {
  1143. candidate := 0
  1144. for {
  1145. // Next src position to check
  1146. nextS := s + (s-nextEmit)>>6 + 4
  1147. if nextS > sLimit {
  1148. goto emitRemainder
  1149. }
  1150. hash0 := hash6(cv, tableBits)
  1151. hash1 := hash6(cv>>8, tableBits)
  1152. candidate = int(table[hash0])
  1153. candidate2 := int(table[hash1])
  1154. table[hash0] = uint32(s)
  1155. table[hash1] = uint32(s + 1)
  1156. hash2 := hash6(cv>>16, tableBits)
  1157. // Check repeat at offset checkRep.
  1158. const checkRep = 1
  1159. if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  1160. base := s + checkRep
  1161. // Extend back
  1162. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  1163. i--
  1164. base--
  1165. }
  1166. // Bail if we exceed the maximum size.
  1167. if d+(base-nextEmit) > dstLimit {
  1168. return 0
  1169. }
  1170. d += emitLiteral(dst[d:], src[nextEmit:base])
  1171. if debug && nextEmit != base {
  1172. fmt.Println("emitted ", base-nextEmit, "literals")
  1173. }
  1174. // Extend forward
  1175. candidate := s - repeat + 4 + checkRep
  1176. s += 4 + checkRep
  1177. for s <= sLimit {
  1178. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1179. s += bits.TrailingZeros64(diff) >> 3
  1180. break
  1181. }
  1182. s += 8
  1183. candidate += 8
  1184. }
  1185. if debug {
  1186. // Validate match.
  1187. if s <= candidate {
  1188. panic("s <= candidate")
  1189. }
  1190. a := src[base:s]
  1191. b := src[base-repeat : base-repeat+(s-base)]
  1192. if !bytes.Equal(a, b) {
  1193. panic("mismatch")
  1194. }
  1195. }
  1196. if nextEmit > 0 {
  1197. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  1198. d += emitRepeat(dst[d:], repeat, s-base)
  1199. } else {
  1200. // First match, cannot be repeat.
  1201. d += emitCopy(dst[d:], repeat, s-base)
  1202. }
  1203. if debug {
  1204. fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
  1205. }
  1206. nextEmit = s
  1207. if s >= sLimit {
  1208. goto emitRemainder
  1209. }
  1210. cv = load64(src, s)
  1211. continue
  1212. }
  1213. if uint32(cv) == load32(src, candidate) {
  1214. break
  1215. }
  1216. candidate = int(table[hash2])
  1217. if uint32(cv>>8) == load32(src, candidate2) {
  1218. table[hash2] = uint32(s + 2)
  1219. candidate = candidate2
  1220. s++
  1221. break
  1222. }
  1223. table[hash2] = uint32(s + 2)
  1224. if uint32(cv>>16) == load32(src, candidate) {
  1225. s += 2
  1226. break
  1227. }
  1228. cv = load64(src, nextS)
  1229. s = nextS
  1230. }
  1231. // Extend backwards.
  1232. // The top bytes will be rechecked to get the full match.
  1233. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  1234. candidate--
  1235. s--
  1236. }
  1237. // Bail if we exceed the maximum size.
  1238. if d+(s-nextEmit) > dstLimit {
  1239. return 0
  1240. }
  1241. // A 4-byte match has been found. We'll later see if more than 4 bytes
  1242. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  1243. // them as literal bytes.
  1244. d += emitLiteral(dst[d:], src[nextEmit:s])
  1245. if debug && nextEmit != s {
  1246. fmt.Println("emitted ", s-nextEmit, "literals")
  1247. }
  1248. // Call emitCopy, and then see if another emitCopy could be our next
  1249. // move. Repeat until we find no match for the input immediately after
  1250. // what was consumed by the last emitCopy call.
  1251. //
  1252. // If we exit this loop normally then we need to call emitLiteral next,
  1253. // though we don't yet know how big the literal will be. We handle that
  1254. // by proceeding to the next iteration of the main loop. We also can
  1255. // exit this loop via goto if we get close to exhausting the input.
  1256. for {
  1257. // Invariant: we have a 4-byte match at s, and no need to emit any
  1258. // literal bytes prior to s.
  1259. base := s
  1260. repeat = base - candidate
  1261. // Extend the 4-byte match as long as possible.
  1262. s += 4
  1263. candidate += 4
  1264. for s <= len(src)-8 {
  1265. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1266. s += bits.TrailingZeros64(diff) >> 3
  1267. break
  1268. }
  1269. s += 8
  1270. candidate += 8
  1271. }
  1272. d += emitCopy(dst[d:], repeat, s-base)
  1273. if debug {
  1274. // Validate match.
  1275. if s <= candidate {
  1276. panic("s <= candidate")
  1277. }
  1278. a := src[base:s]
  1279. b := src[base-repeat : base-repeat+(s-base)]
  1280. if !bytes.Equal(a, b) {
  1281. panic("mismatch")
  1282. }
  1283. }
  1284. if debug {
  1285. fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  1286. }
  1287. nextEmit = s
  1288. if s >= sLimit {
  1289. goto emitRemainder
  1290. }
  1291. if d > dstLimit {
  1292. // Do we have space for more, if not bail.
  1293. return 0
  1294. }
  1295. // Check for an immediate match, otherwise start search at s+1
  1296. x := load64(src, s-2)
  1297. m2Hash := hash6(x, tableBits)
  1298. currHash := hash6(x>>16, tableBits)
  1299. candidate = int(table[currHash])
  1300. table[m2Hash] = uint32(s - 2)
  1301. table[currHash] = uint32(s)
  1302. if debug && s == candidate {
  1303. panic("s == candidate")
  1304. }
  1305. if uint32(x>>16) != load32(src, candidate) {
  1306. cv = load64(src, s+1)
  1307. s++
  1308. break
  1309. }
  1310. }
  1311. }
  1312. emitRemainder:
  1313. if nextEmit < len(src) {
  1314. // Bail if we exceed the maximum size.
  1315. if d+len(src)-nextEmit > dstLimit {
  1316. return 0
  1317. }
  1318. d += emitLiteral(dst[d:], src[nextEmit:])
  1319. if debug && nextEmit != s {
  1320. fmt.Println("emitted ", len(src)-nextEmit, "literals")
  1321. }
  1322. }
  1323. return d
  1324. }