encode_all.go 37 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477
  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "bytes"
  8. "encoding/binary"
  9. "fmt"
  10. "math/bits"
  11. "github.com/klauspost/compress/internal/le"
  12. )
  13. func load32(b []byte, i int) uint32 {
  14. return le.Load32(b, i)
  15. }
  16. func load64(b []byte, i int) uint64 {
  17. return le.Load64(b, i)
  18. }
  19. // hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits.
  20. // Preferably h should be a constant and should always be <64.
  21. func hash6(u uint64, h uint8) uint32 {
  22. const prime6bytes = 227718039650203
  23. return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63))
  24. }
  25. func encodeGo(dst, src []byte) []byte {
  26. if n := MaxEncodedLen(len(src)); n < 0 {
  27. panic(ErrTooLarge)
  28. } else if len(dst) < n {
  29. dst = make([]byte, n)
  30. }
  31. // The block starts with the varint-encoded length of the decompressed bytes.
  32. d := binary.PutUvarint(dst, uint64(len(src)))
  33. if len(src) == 0 {
  34. return dst[:d]
  35. }
  36. if len(src) < minNonLiteralBlockSize {
  37. d += emitLiteral(dst[d:], src)
  38. return dst[:d]
  39. }
  40. var n int
  41. if len(src) < 64<<10 {
  42. n = encodeBlockGo64K(dst[d:], src)
  43. } else {
  44. n = encodeBlockGo(dst[d:], src)
  45. }
  46. if n > 0 {
  47. d += n
  48. return dst[:d]
  49. }
  50. // Not compressible
  51. d += emitLiteral(dst[d:], src)
  52. return dst[:d]
  53. }
  54. // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  55. // assumes that the varint-encoded length of the decompressed bytes has already
  56. // been written.
  57. //
  58. // It also assumes that:
  59. //
  60. // len(dst) >= MaxEncodedLen(len(src)) &&
  61. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  62. func encodeBlockGo(dst, src []byte) (d int) {
  63. // Initialize the hash table.
  64. const (
  65. tableBits = 14
  66. maxTableSize = 1 << tableBits
  67. debug = false
  68. )
  69. var table [maxTableSize]uint32
  70. // sLimit is when to stop looking for offset/length copies. The inputMargin
  71. // lets us use a fast path for emitLiteral in the main loop, while we are
  72. // looking for copies.
  73. sLimit := len(src) - inputMargin
  74. // Bail if we can't compress to at least this.
  75. dstLimit := len(src) - len(src)>>5 - 5
  76. // nextEmit is where in src the next emitLiteral should start from.
  77. nextEmit := 0
  78. // The encoded form must start with a literal, as there are no previous
  79. // bytes to copy, so we start looking for hash matches at s == 1.
  80. s := 1
  81. cv := load64(src, s)
  82. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  83. repeat := 1
  84. for {
  85. candidate := 0
  86. for {
  87. // Next src position to check
  88. nextS := s + (s-nextEmit)>>6 + 4
  89. if nextS > sLimit {
  90. goto emitRemainder
  91. }
  92. hash0 := hash6(cv, tableBits)
  93. hash1 := hash6(cv>>8, tableBits)
  94. candidate = int(table[hash0])
  95. candidate2 := int(table[hash1])
  96. table[hash0] = uint32(s)
  97. table[hash1] = uint32(s + 1)
  98. hash2 := hash6(cv>>16, tableBits)
  99. // Check repeat at offset checkRep.
  100. const checkRep = 1
  101. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  102. base := s + checkRep
  103. // Extend back
  104. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  105. i--
  106. base--
  107. }
  108. // Bail if we exceed the maximum size.
  109. if d+(base-nextEmit) > dstLimit {
  110. return 0
  111. }
  112. d += emitLiteral(dst[d:], src[nextEmit:base])
  113. // Extend forward
  114. candidate := s - repeat + 4 + checkRep
  115. s += 4 + checkRep
  116. for s <= sLimit {
  117. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  118. s += bits.TrailingZeros64(diff) >> 3
  119. break
  120. }
  121. s += 8
  122. candidate += 8
  123. }
  124. if debug {
  125. // Validate match.
  126. if s <= candidate {
  127. panic("s <= candidate")
  128. }
  129. a := src[base:s]
  130. b := src[base-repeat : base-repeat+(s-base)]
  131. if !bytes.Equal(a, b) {
  132. panic("mismatch")
  133. }
  134. }
  135. if nextEmit > 0 {
  136. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  137. d += emitRepeat(dst[d:], repeat, s-base)
  138. } else {
  139. // First match, cannot be repeat.
  140. d += emitCopy(dst[d:], repeat, s-base)
  141. }
  142. nextEmit = s
  143. if s >= sLimit {
  144. goto emitRemainder
  145. }
  146. cv = load64(src, s)
  147. continue
  148. }
  149. if uint32(cv) == load32(src, candidate) {
  150. break
  151. }
  152. candidate = int(table[hash2])
  153. if uint32(cv>>8) == load32(src, candidate2) {
  154. table[hash2] = uint32(s + 2)
  155. candidate = candidate2
  156. s++
  157. break
  158. }
  159. table[hash2] = uint32(s + 2)
  160. if uint32(cv>>16) == load32(src, candidate) {
  161. s += 2
  162. break
  163. }
  164. cv = load64(src, nextS)
  165. s = nextS
  166. }
  167. // Extend backwards.
  168. // The top bytes will be rechecked to get the full match.
  169. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  170. candidate--
  171. s--
  172. }
  173. // Bail if we exceed the maximum size.
  174. if d+(s-nextEmit) > dstLimit {
  175. return 0
  176. }
  177. // A 4-byte match has been found. We'll later see if more than 4 bytes
  178. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  179. // them as literal bytes.
  180. d += emitLiteral(dst[d:], src[nextEmit:s])
  181. // Call emitCopy, and then see if another emitCopy could be our next
  182. // move. Repeat until we find no match for the input immediately after
  183. // what was consumed by the last emitCopy call.
  184. //
  185. // If we exit this loop normally then we need to call emitLiteral next,
  186. // though we don't yet know how big the literal will be. We handle that
  187. // by proceeding to the next iteration of the main loop. We also can
  188. // exit this loop via goto if we get close to exhausting the input.
  189. for {
  190. // Invariant: we have a 4-byte match at s, and no need to emit any
  191. // literal bytes prior to s.
  192. base := s
  193. repeat = base - candidate
  194. // Extend the 4-byte match as long as possible.
  195. s += 4
  196. candidate += 4
  197. for s <= len(src)-8 {
  198. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  199. s += bits.TrailingZeros64(diff) >> 3
  200. break
  201. }
  202. s += 8
  203. candidate += 8
  204. }
  205. d += emitCopy(dst[d:], repeat, s-base)
  206. if debug {
  207. // Validate match.
  208. if s <= candidate {
  209. panic("s <= candidate")
  210. }
  211. a := src[base:s]
  212. b := src[base-repeat : base-repeat+(s-base)]
  213. if !bytes.Equal(a, b) {
  214. panic("mismatch")
  215. }
  216. }
  217. nextEmit = s
  218. if s >= sLimit {
  219. goto emitRemainder
  220. }
  221. if d > dstLimit {
  222. // Do we have space for more, if not bail.
  223. return 0
  224. }
  225. // Check for an immediate match, otherwise start search at s+1
  226. x := load64(src, s-2)
  227. m2Hash := hash6(x, tableBits)
  228. currHash := hash6(x>>16, tableBits)
  229. candidate = int(table[currHash])
  230. table[m2Hash] = uint32(s - 2)
  231. table[currHash] = uint32(s)
  232. if debug && s == candidate {
  233. panic("s == candidate")
  234. }
  235. if uint32(x>>16) != load32(src, candidate) {
  236. cv = load64(src, s+1)
  237. s++
  238. break
  239. }
  240. }
  241. }
  242. emitRemainder:
  243. if nextEmit < len(src) {
  244. // Bail if we exceed the maximum size.
  245. if d+len(src)-nextEmit > dstLimit {
  246. return 0
  247. }
  248. d += emitLiteral(dst[d:], src[nextEmit:])
  249. }
  250. return d
  251. }
  252. // encodeBlockGo64K is a specialized version for compressing blocks <= 64KB
  253. func encodeBlockGo64K(dst, src []byte) (d int) {
  254. // Initialize the hash table.
  255. const (
  256. tableBits = 14
  257. maxTableSize = 1 << tableBits
  258. debug = false
  259. )
  260. var table [maxTableSize]uint16
  261. // sLimit is when to stop looking for offset/length copies. The inputMargin
  262. // lets us use a fast path for emitLiteral in the main loop, while we are
  263. // looking for copies.
  264. sLimit := len(src) - inputMargin
  265. // Bail if we can't compress to at least this.
  266. dstLimit := len(src) - len(src)>>5 - 5
  267. // nextEmit is where in src the next emitLiteral should start from.
  268. nextEmit := 0
  269. // The encoded form must start with a literal, as there are no previous
  270. // bytes to copy, so we start looking for hash matches at s == 1.
  271. s := 1
  272. cv := load64(src, s)
  273. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  274. repeat := 1
  275. for {
  276. candidate := 0
  277. for {
  278. // Next src position to check
  279. nextS := s + (s-nextEmit)>>5 + 4
  280. if nextS > sLimit {
  281. goto emitRemainder
  282. }
  283. hash0 := hash6(cv, tableBits)
  284. hash1 := hash6(cv>>8, tableBits)
  285. candidate = int(table[hash0])
  286. candidate2 := int(table[hash1])
  287. table[hash0] = uint16(s)
  288. table[hash1] = uint16(s + 1)
  289. hash2 := hash6(cv>>16, tableBits)
  290. // Check repeat at offset checkRep.
  291. const checkRep = 1
  292. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  293. base := s + checkRep
  294. // Extend back
  295. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  296. i--
  297. base--
  298. }
  299. // Bail if we exceed the maximum size.
  300. if d+(base-nextEmit) > dstLimit {
  301. return 0
  302. }
  303. d += emitLiteral(dst[d:], src[nextEmit:base])
  304. // Extend forward
  305. candidate := s - repeat + 4 + checkRep
  306. s += 4 + checkRep
  307. for s <= sLimit {
  308. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  309. s += bits.TrailingZeros64(diff) >> 3
  310. break
  311. }
  312. s += 8
  313. candidate += 8
  314. }
  315. if debug {
  316. // Validate match.
  317. if s <= candidate {
  318. panic("s <= candidate")
  319. }
  320. a := src[base:s]
  321. b := src[base-repeat : base-repeat+(s-base)]
  322. if !bytes.Equal(a, b) {
  323. panic("mismatch")
  324. }
  325. }
  326. if nextEmit > 0 {
  327. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  328. d += emitRepeat(dst[d:], repeat, s-base)
  329. } else {
  330. // First match, cannot be repeat.
  331. d += emitCopy(dst[d:], repeat, s-base)
  332. }
  333. nextEmit = s
  334. if s >= sLimit {
  335. goto emitRemainder
  336. }
  337. cv = load64(src, s)
  338. continue
  339. }
  340. if uint32(cv) == load32(src, candidate) {
  341. break
  342. }
  343. candidate = int(table[hash2])
  344. if uint32(cv>>8) == load32(src, candidate2) {
  345. table[hash2] = uint16(s + 2)
  346. candidate = candidate2
  347. s++
  348. break
  349. }
  350. table[hash2] = uint16(s + 2)
  351. if uint32(cv>>16) == load32(src, candidate) {
  352. s += 2
  353. break
  354. }
  355. cv = load64(src, nextS)
  356. s = nextS
  357. }
  358. // Extend backwards.
  359. // The top bytes will be rechecked to get the full match.
  360. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  361. candidate--
  362. s--
  363. }
  364. // Bail if we exceed the maximum size.
  365. if d+(s-nextEmit) > dstLimit {
  366. return 0
  367. }
  368. // A 4-byte match has been found. We'll later see if more than 4 bytes
  369. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  370. // them as literal bytes.
  371. d += emitLiteral(dst[d:], src[nextEmit:s])
  372. // Call emitCopy, and then see if another emitCopy could be our next
  373. // move. Repeat until we find no match for the input immediately after
  374. // what was consumed by the last emitCopy call.
  375. //
  376. // If we exit this loop normally then we need to call emitLiteral next,
  377. // though we don't yet know how big the literal will be. We handle that
  378. // by proceeding to the next iteration of the main loop. We also can
  379. // exit this loop via goto if we get close to exhausting the input.
  380. for {
  381. // Invariant: we have a 4-byte match at s, and no need to emit any
  382. // literal bytes prior to s.
  383. base := s
  384. repeat = base - candidate
  385. // Extend the 4-byte match as long as possible.
  386. s += 4
  387. candidate += 4
  388. for s <= len(src)-8 {
  389. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  390. s += bits.TrailingZeros64(diff) >> 3
  391. break
  392. }
  393. s += 8
  394. candidate += 8
  395. }
  396. d += emitCopy(dst[d:], repeat, s-base)
  397. if debug {
  398. // Validate match.
  399. if s <= candidate {
  400. panic("s <= candidate")
  401. }
  402. a := src[base:s]
  403. b := src[base-repeat : base-repeat+(s-base)]
  404. if !bytes.Equal(a, b) {
  405. panic("mismatch")
  406. }
  407. }
  408. nextEmit = s
  409. if s >= sLimit {
  410. goto emitRemainder
  411. }
  412. if d > dstLimit {
  413. // Do we have space for more, if not bail.
  414. return 0
  415. }
  416. // Check for an immediate match, otherwise start search at s+1
  417. x := load64(src, s-2)
  418. m2Hash := hash6(x, tableBits)
  419. currHash := hash6(x>>16, tableBits)
  420. candidate = int(table[currHash])
  421. table[m2Hash] = uint16(s - 2)
  422. table[currHash] = uint16(s)
  423. if debug && s == candidate {
  424. panic("s == candidate")
  425. }
  426. if uint32(x>>16) != load32(src, candidate) {
  427. cv = load64(src, s+1)
  428. s++
  429. break
  430. }
  431. }
  432. }
  433. emitRemainder:
  434. if nextEmit < len(src) {
  435. // Bail if we exceed the maximum size.
  436. if d+len(src)-nextEmit > dstLimit {
  437. return 0
  438. }
  439. d += emitLiteral(dst[d:], src[nextEmit:])
  440. }
  441. return d
  442. }
  443. func encodeBlockSnappyGo(dst, src []byte) (d int) {
  444. // Initialize the hash table.
  445. const (
  446. tableBits = 14
  447. maxTableSize = 1 << tableBits
  448. )
  449. var table [maxTableSize]uint32
  450. // sLimit is when to stop looking for offset/length copies. The inputMargin
  451. // lets us use a fast path for emitLiteral in the main loop, while we are
  452. // looking for copies.
  453. sLimit := len(src) - inputMargin
  454. // Bail if we can't compress to at least this.
  455. dstLimit := len(src) - len(src)>>5 - 5
  456. // nextEmit is where in src the next emitLiteral should start from.
  457. nextEmit := 0
  458. // The encoded form must start with a literal, as there are no previous
  459. // bytes to copy, so we start looking for hash matches at s == 1.
  460. s := 1
  461. cv := load64(src, s)
  462. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  463. repeat := 1
  464. for {
  465. candidate := 0
  466. for {
  467. // Next src position to check
  468. nextS := s + (s-nextEmit)>>6 + 4
  469. if nextS > sLimit {
  470. goto emitRemainder
  471. }
  472. hash0 := hash6(cv, tableBits)
  473. hash1 := hash6(cv>>8, tableBits)
  474. candidate = int(table[hash0])
  475. candidate2 := int(table[hash1])
  476. table[hash0] = uint32(s)
  477. table[hash1] = uint32(s + 1)
  478. hash2 := hash6(cv>>16, tableBits)
  479. // Check repeat at offset checkRep.
  480. const checkRep = 1
  481. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  482. base := s + checkRep
  483. // Extend back
  484. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  485. i--
  486. base--
  487. }
  488. // Bail if we exceed the maximum size.
  489. if d+(base-nextEmit) > dstLimit {
  490. return 0
  491. }
  492. d += emitLiteral(dst[d:], src[nextEmit:base])
  493. // Extend forward
  494. candidate := s - repeat + 4 + checkRep
  495. s += 4 + checkRep
  496. for s <= sLimit {
  497. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  498. s += bits.TrailingZeros64(diff) >> 3
  499. break
  500. }
  501. s += 8
  502. candidate += 8
  503. }
  504. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  505. nextEmit = s
  506. if s >= sLimit {
  507. goto emitRemainder
  508. }
  509. cv = load64(src, s)
  510. continue
  511. }
  512. if uint32(cv) == load32(src, candidate) {
  513. break
  514. }
  515. candidate = int(table[hash2])
  516. if uint32(cv>>8) == load32(src, candidate2) {
  517. table[hash2] = uint32(s + 2)
  518. candidate = candidate2
  519. s++
  520. break
  521. }
  522. table[hash2] = uint32(s + 2)
  523. if uint32(cv>>16) == load32(src, candidate) {
  524. s += 2
  525. break
  526. }
  527. cv = load64(src, nextS)
  528. s = nextS
  529. }
  530. // Extend backwards
  531. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  532. candidate--
  533. s--
  534. }
  535. // Bail if we exceed the maximum size.
  536. if d+(s-nextEmit) > dstLimit {
  537. return 0
  538. }
  539. // A 4-byte match has been found. We'll later see if more than 4 bytes
  540. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  541. // them as literal bytes.
  542. d += emitLiteral(dst[d:], src[nextEmit:s])
  543. // Call emitCopy, and then see if another emitCopy could be our next
  544. // move. Repeat until we find no match for the input immediately after
  545. // what was consumed by the last emitCopy call.
  546. //
  547. // If we exit this loop normally then we need to call emitLiteral next,
  548. // though we don't yet know how big the literal will be. We handle that
  549. // by proceeding to the next iteration of the main loop. We also can
  550. // exit this loop via goto if we get close to exhausting the input.
  551. for {
  552. // Invariant: we have a 4-byte match at s, and no need to emit any
  553. // literal bytes prior to s.
  554. base := s
  555. repeat = base - candidate
  556. // Extend the 4-byte match as long as possible.
  557. s += 4
  558. candidate += 4
  559. for s <= len(src)-8 {
  560. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  561. s += bits.TrailingZeros64(diff) >> 3
  562. break
  563. }
  564. s += 8
  565. candidate += 8
  566. }
  567. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  568. if false {
  569. // Validate match.
  570. a := src[base:s]
  571. b := src[base-repeat : base-repeat+(s-base)]
  572. if !bytes.Equal(a, b) {
  573. panic("mismatch")
  574. }
  575. }
  576. nextEmit = s
  577. if s >= sLimit {
  578. goto emitRemainder
  579. }
  580. if d > dstLimit {
  581. // Do we have space for more, if not bail.
  582. return 0
  583. }
  584. // Check for an immediate match, otherwise start search at s+1
  585. x := load64(src, s-2)
  586. m2Hash := hash6(x, tableBits)
  587. currHash := hash6(x>>16, tableBits)
  588. candidate = int(table[currHash])
  589. table[m2Hash] = uint32(s - 2)
  590. table[currHash] = uint32(s)
  591. if uint32(x>>16) != load32(src, candidate) {
  592. cv = load64(src, s+1)
  593. s++
  594. break
  595. }
  596. }
  597. }
  598. emitRemainder:
  599. if nextEmit < len(src) {
  600. // Bail if we exceed the maximum size.
  601. if d+len(src)-nextEmit > dstLimit {
  602. return 0
  603. }
  604. d += emitLiteral(dst[d:], src[nextEmit:])
  605. }
  606. return d
  607. }
  608. // encodeBlockSnappyGo64K is a special version of encodeBlockSnappyGo for sizes <64KB
  609. func encodeBlockSnappyGo64K(dst, src []byte) (d int) {
  610. // Initialize the hash table.
  611. const (
  612. tableBits = 14
  613. maxTableSize = 1 << tableBits
  614. )
  615. var table [maxTableSize]uint16
  616. // sLimit is when to stop looking for offset/length copies. The inputMargin
  617. // lets us use a fast path for emitLiteral in the main loop, while we are
  618. // looking for copies.
  619. sLimit := len(src) - inputMargin
  620. // Bail if we can't compress to at least this.
  621. dstLimit := len(src) - len(src)>>5 - 5
  622. // nextEmit is where in src the next emitLiteral should start from.
  623. nextEmit := 0
  624. // The encoded form must start with a literal, as there are no previous
  625. // bytes to copy, so we start looking for hash matches at s == 1.
  626. s := 1
  627. cv := load64(src, s)
  628. // We search for a repeat at -1, but don't output repeats when nextEmit == 0
  629. repeat := 1
  630. for {
  631. candidate := 0
  632. for {
  633. // Next src position to check
  634. nextS := s + (s-nextEmit)>>5 + 4
  635. if nextS > sLimit {
  636. goto emitRemainder
  637. }
  638. hash0 := hash6(cv, tableBits)
  639. hash1 := hash6(cv>>8, tableBits)
  640. candidate = int(table[hash0])
  641. candidate2 := int(table[hash1])
  642. table[hash0] = uint16(s)
  643. table[hash1] = uint16(s + 1)
  644. hash2 := hash6(cv>>16, tableBits)
  645. // Check repeat at offset checkRep.
  646. const checkRep = 1
  647. if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  648. base := s + checkRep
  649. // Extend back
  650. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  651. i--
  652. base--
  653. }
  654. // Bail if we exceed the maximum size.
  655. if d+(base-nextEmit) > dstLimit {
  656. return 0
  657. }
  658. d += emitLiteral(dst[d:], src[nextEmit:base])
  659. // Extend forward
  660. candidate := s - repeat + 4 + checkRep
  661. s += 4 + checkRep
  662. for s <= sLimit {
  663. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  664. s += bits.TrailingZeros64(diff) >> 3
  665. break
  666. }
  667. s += 8
  668. candidate += 8
  669. }
  670. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  671. nextEmit = s
  672. if s >= sLimit {
  673. goto emitRemainder
  674. }
  675. cv = load64(src, s)
  676. continue
  677. }
  678. if uint32(cv) == load32(src, candidate) {
  679. break
  680. }
  681. candidate = int(table[hash2])
  682. if uint32(cv>>8) == load32(src, candidate2) {
  683. table[hash2] = uint16(s + 2)
  684. candidate = candidate2
  685. s++
  686. break
  687. }
  688. table[hash2] = uint16(s + 2)
  689. if uint32(cv>>16) == load32(src, candidate) {
  690. s += 2
  691. break
  692. }
  693. cv = load64(src, nextS)
  694. s = nextS
  695. }
  696. // Extend backwards
  697. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  698. candidate--
  699. s--
  700. }
  701. // Bail if we exceed the maximum size.
  702. if d+(s-nextEmit) > dstLimit {
  703. return 0
  704. }
  705. // A 4-byte match has been found. We'll later see if more than 4 bytes
  706. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  707. // them as literal bytes.
  708. d += emitLiteral(dst[d:], src[nextEmit:s])
  709. // Call emitCopy, and then see if another emitCopy could be our next
  710. // move. Repeat until we find no match for the input immediately after
  711. // what was consumed by the last emitCopy call.
  712. //
  713. // If we exit this loop normally then we need to call emitLiteral next,
  714. // though we don't yet know how big the literal will be. We handle that
  715. // by proceeding to the next iteration of the main loop. We also can
  716. // exit this loop via goto if we get close to exhausting the input.
  717. for {
  718. // Invariant: we have a 4-byte match at s, and no need to emit any
  719. // literal bytes prior to s.
  720. base := s
  721. repeat = base - candidate
  722. // Extend the 4-byte match as long as possible.
  723. s += 4
  724. candidate += 4
  725. for s <= len(src)-8 {
  726. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  727. s += bits.TrailingZeros64(diff) >> 3
  728. break
  729. }
  730. s += 8
  731. candidate += 8
  732. }
  733. d += emitCopyNoRepeat(dst[d:], repeat, s-base)
  734. if false {
  735. // Validate match.
  736. a := src[base:s]
  737. b := src[base-repeat : base-repeat+(s-base)]
  738. if !bytes.Equal(a, b) {
  739. panic("mismatch")
  740. }
  741. }
  742. nextEmit = s
  743. if s >= sLimit {
  744. goto emitRemainder
  745. }
  746. if d > dstLimit {
  747. // Do we have space for more, if not bail.
  748. return 0
  749. }
  750. // Check for an immediate match, otherwise start search at s+1
  751. x := load64(src, s-2)
  752. m2Hash := hash6(x, tableBits)
  753. currHash := hash6(x>>16, tableBits)
  754. candidate = int(table[currHash])
  755. table[m2Hash] = uint16(s - 2)
  756. table[currHash] = uint16(s)
  757. if uint32(x>>16) != load32(src, candidate) {
  758. cv = load64(src, s+1)
  759. s++
  760. break
  761. }
  762. }
  763. }
  764. emitRemainder:
  765. if nextEmit < len(src) {
  766. // Bail if we exceed the maximum size.
  767. if d+len(src)-nextEmit > dstLimit {
  768. return 0
  769. }
  770. d += emitLiteral(dst[d:], src[nextEmit:])
  771. }
  772. return d
  773. }
  774. // encodeBlockGo encodes a non-empty src to a guaranteed-large-enough dst. It
  775. // assumes that the varint-encoded length of the decompressed bytes has already
  776. // been written.
  777. //
  778. // It also assumes that:
  779. //
  780. // len(dst) >= MaxEncodedLen(len(src)) &&
  781. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  782. func encodeBlockDictGo(dst, src []byte, dict *Dict) (d int) {
  783. // Initialize the hash table.
  784. const (
  785. tableBits = 14
  786. maxTableSize = 1 << tableBits
  787. maxAhead = 8 // maximum bytes ahead without checking sLimit
  788. debug = false
  789. )
  790. dict.initFast()
  791. var table [maxTableSize]uint32
  792. // sLimit is when to stop looking for offset/length copies. The inputMargin
  793. // lets us use a fast path for emitLiteral in the main loop, while we are
  794. // looking for copies.
  795. sLimit := min(len(src)-inputMargin, MaxDictSrcOffset-maxAhead)
  796. // Bail if we can't compress to at least this.
  797. dstLimit := len(src) - len(src)>>5 - 5
  798. // nextEmit is where in src the next emitLiteral should start from.
  799. nextEmit := 0
  800. // The encoded form can start with a dict entry (copy or repeat).
  801. s := 0
  802. // Convert dict repeat to offset
  803. repeat := len(dict.dict) - dict.repeat
  804. cv := load64(src, 0)
  805. // While in dict
  806. searchDict:
  807. for {
  808. // Next src position to check
  809. nextS := s + (s-nextEmit)>>6 + 4
  810. hash0 := hash6(cv, tableBits)
  811. hash1 := hash6(cv>>8, tableBits)
  812. if nextS > sLimit {
  813. if debug {
  814. fmt.Println("slimit reached", s, nextS)
  815. }
  816. break searchDict
  817. }
  818. candidateDict := int(dict.fastTable[hash0])
  819. candidateDict2 := int(dict.fastTable[hash1])
  820. candidate2 := int(table[hash1])
  821. candidate := int(table[hash0])
  822. table[hash0] = uint32(s)
  823. table[hash1] = uint32(s + 1)
  824. hash2 := hash6(cv>>16, tableBits)
  825. // Check repeat at offset checkRep.
  826. const checkRep = 1
  827. if repeat > s {
  828. candidate := len(dict.dict) - repeat + s
  829. if repeat-s >= 4 && uint32(cv) == load32(dict.dict, candidate) {
  830. // Extend back
  831. base := s
  832. for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
  833. i--
  834. base--
  835. }
  836. // Bail if we exceed the maximum size.
  837. if d+(base-nextEmit) > dstLimit {
  838. return 0
  839. }
  840. d += emitLiteral(dst[d:], src[nextEmit:base])
  841. if debug && nextEmit != base {
  842. fmt.Println("emitted ", base-nextEmit, "literals")
  843. }
  844. s += 4
  845. candidate += 4
  846. for candidate < len(dict.dict)-8 && s <= len(src)-8 {
  847. if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
  848. s += bits.TrailingZeros64(diff) >> 3
  849. break
  850. }
  851. s += 8
  852. candidate += 8
  853. }
  854. d += emitRepeat(dst[d:], repeat, s-base)
  855. if debug {
  856. fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
  857. }
  858. nextEmit = s
  859. if s >= sLimit {
  860. break searchDict
  861. }
  862. cv = load64(src, s)
  863. continue
  864. }
  865. } else if uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  866. base := s + checkRep
  867. // Extend back
  868. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  869. i--
  870. base--
  871. }
  872. d += emitLiteral(dst[d:], src[nextEmit:base])
  873. if debug && nextEmit != base {
  874. fmt.Println("emitted ", base-nextEmit, "literals")
  875. }
  876. // Extend forward
  877. candidate := s - repeat + 4 + checkRep
  878. s += 4 + checkRep
  879. for s <= sLimit {
  880. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  881. s += bits.TrailingZeros64(diff) >> 3
  882. break
  883. }
  884. s += 8
  885. candidate += 8
  886. }
  887. if debug {
  888. // Validate match.
  889. if s <= candidate {
  890. panic("s <= candidate")
  891. }
  892. a := src[base:s]
  893. b := src[base-repeat : base-repeat+(s-base)]
  894. if !bytes.Equal(a, b) {
  895. panic("mismatch")
  896. }
  897. }
  898. if nextEmit > 0 {
  899. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  900. d += emitRepeat(dst[d:], repeat, s-base)
  901. } else {
  902. // First match, cannot be repeat.
  903. d += emitCopy(dst[d:], repeat, s-base)
  904. }
  905. nextEmit = s
  906. if s >= sLimit {
  907. break searchDict
  908. }
  909. if debug {
  910. fmt.Println("emitted reg repeat", s-base, "s:", s)
  911. }
  912. cv = load64(src, s)
  913. continue searchDict
  914. }
  915. if s == 0 {
  916. cv = load64(src, nextS)
  917. s = nextS
  918. continue searchDict
  919. }
  920. // Start with table. These matches will always be closer.
  921. if uint32(cv) == load32(src, candidate) {
  922. goto emitMatch
  923. }
  924. candidate = int(table[hash2])
  925. if uint32(cv>>8) == load32(src, candidate2) {
  926. table[hash2] = uint32(s + 2)
  927. candidate = candidate2
  928. s++
  929. goto emitMatch
  930. }
  931. // Check dict. Dicts have longer offsets, so we want longer matches.
  932. if cv == load64(dict.dict, candidateDict) {
  933. table[hash2] = uint32(s + 2)
  934. goto emitDict
  935. }
  936. candidateDict = int(dict.fastTable[hash2])
  937. // Check if upper 7 bytes match
  938. if candidateDict2 >= 1 {
  939. if cv^load64(dict.dict, candidateDict2-1) < (1 << 8) {
  940. table[hash2] = uint32(s + 2)
  941. candidateDict = candidateDict2
  942. s++
  943. goto emitDict
  944. }
  945. }
  946. table[hash2] = uint32(s + 2)
  947. if uint32(cv>>16) == load32(src, candidate) {
  948. s += 2
  949. goto emitMatch
  950. }
  951. if candidateDict >= 2 {
  952. // Check if upper 6 bytes match
  953. if cv^load64(dict.dict, candidateDict-2) < (1 << 16) {
  954. s += 2
  955. goto emitDict
  956. }
  957. }
  958. cv = load64(src, nextS)
  959. s = nextS
  960. continue searchDict
  961. emitDict:
  962. {
  963. if debug {
  964. if load32(dict.dict, candidateDict) != load32(src, s) {
  965. panic("dict emit mismatch")
  966. }
  967. }
  968. // Extend backwards.
  969. // The top bytes will be rechecked to get the full match.
  970. for candidateDict > 0 && s > nextEmit && dict.dict[candidateDict-1] == src[s-1] {
  971. candidateDict--
  972. s--
  973. }
  974. // Bail if we exceed the maximum size.
  975. if d+(s-nextEmit) > dstLimit {
  976. return 0
  977. }
  978. // A 4-byte match has been found. We'll later see if more than 4 bytes
  979. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  980. // them as literal bytes.
  981. d += emitLiteral(dst[d:], src[nextEmit:s])
  982. if debug && nextEmit != s {
  983. fmt.Println("emitted ", s-nextEmit, "literals")
  984. }
  985. {
  986. // Invariant: we have a 4-byte match at s, and no need to emit any
  987. // literal bytes prior to s.
  988. base := s
  989. repeat = s + (len(dict.dict)) - candidateDict
  990. // Extend the 4-byte match as long as possible.
  991. s += 4
  992. candidateDict += 4
  993. for s <= len(src)-8 && len(dict.dict)-candidateDict >= 8 {
  994. if diff := load64(src, s) ^ load64(dict.dict, candidateDict); diff != 0 {
  995. s += bits.TrailingZeros64(diff) >> 3
  996. break
  997. }
  998. s += 8
  999. candidateDict += 8
  1000. }
  1001. // Matches longer than 64 are split.
  1002. if s <= sLimit || s-base < 8 {
  1003. d += emitCopy(dst[d:], repeat, s-base)
  1004. } else {
  1005. // Split to ensure we don't start a copy within next block
  1006. d += emitCopy(dst[d:], repeat, 4)
  1007. d += emitRepeat(dst[d:], repeat, s-base-4)
  1008. }
  1009. if false {
  1010. // Validate match.
  1011. if s <= candidate {
  1012. panic("s <= candidate")
  1013. }
  1014. a := src[base:s]
  1015. b := dict.dict[base-repeat : base-repeat+(s-base)]
  1016. if !bytes.Equal(a, b) {
  1017. panic("mismatch")
  1018. }
  1019. }
  1020. if debug {
  1021. fmt.Println("emitted dict copy, length", s-base, "offset:", repeat, "s:", s)
  1022. }
  1023. nextEmit = s
  1024. if s >= sLimit {
  1025. break searchDict
  1026. }
  1027. if d > dstLimit {
  1028. // Do we have space for more, if not bail.
  1029. return 0
  1030. }
  1031. // Index and continue loop to try new candidate.
  1032. x := load64(src, s-2)
  1033. m2Hash := hash6(x, tableBits)
  1034. currHash := hash6(x>>8, tableBits)
  1035. table[m2Hash] = uint32(s - 2)
  1036. table[currHash] = uint32(s - 1)
  1037. cv = load64(src, s)
  1038. }
  1039. continue
  1040. }
  1041. emitMatch:
  1042. // Extend backwards.
  1043. // The top bytes will be rechecked to get the full match.
  1044. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  1045. candidate--
  1046. s--
  1047. }
  1048. // Bail if we exceed the maximum size.
  1049. if d+(s-nextEmit) > dstLimit {
  1050. return 0
  1051. }
  1052. // A 4-byte match has been found. We'll later see if more than 4 bytes
  1053. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  1054. // them as literal bytes.
  1055. d += emitLiteral(dst[d:], src[nextEmit:s])
  1056. if debug && nextEmit != s {
  1057. fmt.Println("emitted ", s-nextEmit, "literals")
  1058. }
  1059. // Call emitCopy, and then see if another emitCopy could be our next
  1060. // move. Repeat until we find no match for the input immediately after
  1061. // what was consumed by the last emitCopy call.
  1062. //
  1063. // If we exit this loop normally then we need to call emitLiteral next,
  1064. // though we don't yet know how big the literal will be. We handle that
  1065. // by proceeding to the next iteration of the main loop. We also can
  1066. // exit this loop via goto if we get close to exhausting the input.
  1067. for {
  1068. // Invariant: we have a 4-byte match at s, and no need to emit any
  1069. // literal bytes prior to s.
  1070. base := s
  1071. repeat = base - candidate
  1072. // Extend the 4-byte match as long as possible.
  1073. s += 4
  1074. candidate += 4
  1075. for s <= len(src)-8 {
  1076. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1077. s += bits.TrailingZeros64(diff) >> 3
  1078. break
  1079. }
  1080. s += 8
  1081. candidate += 8
  1082. }
  1083. d += emitCopy(dst[d:], repeat, s-base)
  1084. if debug {
  1085. // Validate match.
  1086. if s <= candidate {
  1087. panic("s <= candidate")
  1088. }
  1089. a := src[base:s]
  1090. b := src[base-repeat : base-repeat+(s-base)]
  1091. if !bytes.Equal(a, b) {
  1092. panic("mismatch")
  1093. }
  1094. }
  1095. if debug {
  1096. fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  1097. }
  1098. nextEmit = s
  1099. if s >= sLimit {
  1100. break searchDict
  1101. }
  1102. if d > dstLimit {
  1103. // Do we have space for more, if not bail.
  1104. return 0
  1105. }
  1106. // Check for an immediate match, otherwise start search at s+1
  1107. x := load64(src, s-2)
  1108. m2Hash := hash6(x, tableBits)
  1109. currHash := hash6(x>>16, tableBits)
  1110. candidate = int(table[currHash])
  1111. table[m2Hash] = uint32(s - 2)
  1112. table[currHash] = uint32(s)
  1113. if debug && s == candidate {
  1114. panic("s == candidate")
  1115. }
  1116. if uint32(x>>16) != load32(src, candidate) {
  1117. cv = load64(src, s+1)
  1118. s++
  1119. break
  1120. }
  1121. }
  1122. }
  1123. // Search without dict:
  1124. if repeat > s {
  1125. repeat = 0
  1126. }
  1127. // No more dict
  1128. sLimit = len(src) - inputMargin
  1129. if s >= sLimit {
  1130. goto emitRemainder
  1131. }
  1132. if debug {
  1133. fmt.Println("non-dict matching at", s, "repeat:", repeat)
  1134. }
  1135. cv = load64(src, s)
  1136. if debug {
  1137. fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
  1138. }
  1139. for {
  1140. candidate := 0
  1141. for {
  1142. // Next src position to check
  1143. nextS := s + (s-nextEmit)>>6 + 4
  1144. if nextS > sLimit {
  1145. goto emitRemainder
  1146. }
  1147. hash0 := hash6(cv, tableBits)
  1148. hash1 := hash6(cv>>8, tableBits)
  1149. candidate = int(table[hash0])
  1150. candidate2 := int(table[hash1])
  1151. table[hash0] = uint32(s)
  1152. table[hash1] = uint32(s + 1)
  1153. hash2 := hash6(cv>>16, tableBits)
  1154. // Check repeat at offset checkRep.
  1155. const checkRep = 1
  1156. if repeat > 0 && uint32(cv>>(checkRep*8)) == load32(src, s-repeat+checkRep) {
  1157. base := s + checkRep
  1158. // Extend back
  1159. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  1160. i--
  1161. base--
  1162. }
  1163. // Bail if we exceed the maximum size.
  1164. if d+(base-nextEmit) > dstLimit {
  1165. return 0
  1166. }
  1167. d += emitLiteral(dst[d:], src[nextEmit:base])
  1168. if debug && nextEmit != base {
  1169. fmt.Println("emitted ", base-nextEmit, "literals")
  1170. }
  1171. // Extend forward
  1172. candidate := s - repeat + 4 + checkRep
  1173. s += 4 + checkRep
  1174. for s <= sLimit {
  1175. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1176. s += bits.TrailingZeros64(diff) >> 3
  1177. break
  1178. }
  1179. s += 8
  1180. candidate += 8
  1181. }
  1182. if debug {
  1183. // Validate match.
  1184. if s <= candidate {
  1185. panic("s <= candidate")
  1186. }
  1187. a := src[base:s]
  1188. b := src[base-repeat : base-repeat+(s-base)]
  1189. if !bytes.Equal(a, b) {
  1190. panic("mismatch")
  1191. }
  1192. }
  1193. if nextEmit > 0 {
  1194. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  1195. d += emitRepeat(dst[d:], repeat, s-base)
  1196. } else {
  1197. // First match, cannot be repeat.
  1198. d += emitCopy(dst[d:], repeat, s-base)
  1199. }
  1200. if debug {
  1201. fmt.Println("emitted src repeat length", s-base, "offset:", repeat, "s:", s)
  1202. }
  1203. nextEmit = s
  1204. if s >= sLimit {
  1205. goto emitRemainder
  1206. }
  1207. cv = load64(src, s)
  1208. continue
  1209. }
  1210. if uint32(cv) == load32(src, candidate) {
  1211. break
  1212. }
  1213. candidate = int(table[hash2])
  1214. if uint32(cv>>8) == load32(src, candidate2) {
  1215. table[hash2] = uint32(s + 2)
  1216. candidate = candidate2
  1217. s++
  1218. break
  1219. }
  1220. table[hash2] = uint32(s + 2)
  1221. if uint32(cv>>16) == load32(src, candidate) {
  1222. s += 2
  1223. break
  1224. }
  1225. cv = load64(src, nextS)
  1226. s = nextS
  1227. }
  1228. // Extend backwards.
  1229. // The top bytes will be rechecked to get the full match.
  1230. for candidate > 0 && s > nextEmit && src[candidate-1] == src[s-1] {
  1231. candidate--
  1232. s--
  1233. }
  1234. // Bail if we exceed the maximum size.
  1235. if d+(s-nextEmit) > dstLimit {
  1236. return 0
  1237. }
  1238. // A 4-byte match has been found. We'll later see if more than 4 bytes
  1239. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  1240. // them as literal bytes.
  1241. d += emitLiteral(dst[d:], src[nextEmit:s])
  1242. if debug && nextEmit != s {
  1243. fmt.Println("emitted ", s-nextEmit, "literals")
  1244. }
  1245. // Call emitCopy, and then see if another emitCopy could be our next
  1246. // move. Repeat until we find no match for the input immediately after
  1247. // what was consumed by the last emitCopy call.
  1248. //
  1249. // If we exit this loop normally then we need to call emitLiteral next,
  1250. // though we don't yet know how big the literal will be. We handle that
  1251. // by proceeding to the next iteration of the main loop. We also can
  1252. // exit this loop via goto if we get close to exhausting the input.
  1253. for {
  1254. // Invariant: we have a 4-byte match at s, and no need to emit any
  1255. // literal bytes prior to s.
  1256. base := s
  1257. repeat = base - candidate
  1258. // Extend the 4-byte match as long as possible.
  1259. s += 4
  1260. candidate += 4
  1261. for s <= len(src)-8 {
  1262. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1263. s += bits.TrailingZeros64(diff) >> 3
  1264. break
  1265. }
  1266. s += 8
  1267. candidate += 8
  1268. }
  1269. d += emitCopy(dst[d:], repeat, s-base)
  1270. if debug {
  1271. // Validate match.
  1272. if s <= candidate {
  1273. panic("s <= candidate")
  1274. }
  1275. a := src[base:s]
  1276. b := src[base-repeat : base-repeat+(s-base)]
  1277. if !bytes.Equal(a, b) {
  1278. panic("mismatch")
  1279. }
  1280. }
  1281. if debug {
  1282. fmt.Println("emitted src copy, length", s-base, "offset:", repeat, "s:", s)
  1283. }
  1284. nextEmit = s
  1285. if s >= sLimit {
  1286. goto emitRemainder
  1287. }
  1288. if d > dstLimit {
  1289. // Do we have space for more, if not bail.
  1290. return 0
  1291. }
  1292. // Check for an immediate match, otherwise start search at s+1
  1293. x := load64(src, s-2)
  1294. m2Hash := hash6(x, tableBits)
  1295. currHash := hash6(x>>16, tableBits)
  1296. candidate = int(table[currHash])
  1297. table[m2Hash] = uint32(s - 2)
  1298. table[currHash] = uint32(s)
  1299. if debug && s == candidate {
  1300. panic("s == candidate")
  1301. }
  1302. if uint32(x>>16) != load32(src, candidate) {
  1303. cv = load64(src, s+1)
  1304. s++
  1305. break
  1306. }
  1307. }
  1308. }
  1309. emitRemainder:
  1310. if nextEmit < len(src) {
  1311. // Bail if we exceed the maximum size.
  1312. if d+len(src)-nextEmit > dstLimit {
  1313. return 0
  1314. }
  1315. d += emitLiteral(dst[d:], src[nextEmit:])
  1316. if debug && nextEmit != s {
  1317. fmt.Println("emitted ", len(src)-nextEmit, "literals")
  1318. }
  1319. }
  1320. return d
  1321. }