encode_better.go 36 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510
  1. // Copyright 2016 The Snappy-Go Authors. All rights reserved.
  2. // Copyright (c) 2019 Klaus Post. All rights reserved.
  3. // Use of this source code is governed by a BSD-style
  4. // license that can be found in the LICENSE file.
  5. package s2
  6. import (
  7. "bytes"
  8. "fmt"
  9. "math/bits"
  10. )
  11. // hash4 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits.
  12. // Preferably h should be a constant and should always be <32.
  13. func hash4(u uint64, h uint8) uint32 {
  14. const prime4bytes = 2654435761
  15. return (uint32(u) * prime4bytes) >> ((32 - h) & 31)
  16. }
  17. // hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits.
  18. // Preferably h should be a constant and should always be <64.
  19. func hash5(u uint64, h uint8) uint32 {
  20. const prime5bytes = 889523592379
  21. return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63))
  22. }
  23. // hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
  24. // Preferably h should be a constant and should always be <64.
  25. func hash7(u uint64, h uint8) uint32 {
  26. const prime7bytes = 58295818150454627
  27. return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63))
  28. }
  29. // hash8 returns the hash of u to fit in a hash table with h bits.
  30. // Preferably h should be a constant and should always be <64.
  31. func hash8(u uint64, h uint8) uint32 {
  32. const prime8bytes = 0xcf1bbcdcb7a56463
  33. return uint32((u * prime8bytes) >> ((64 - h) & 63))
  34. }
  35. // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
  36. // assumes that the varint-encoded length of the decompressed bytes has already
  37. // been written.
  38. //
  39. // It also assumes that:
  40. //
  41. // len(dst) >= MaxEncodedLen(len(src)) &&
  42. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  43. func encodeBlockBetterGo(dst, src []byte) (d int) {
  44. // sLimit is when to stop looking for offset/length copies. The inputMargin
  45. // lets us use a fast path for emitLiteral in the main loop, while we are
  46. // looking for copies.
  47. sLimit := len(src) - inputMargin
  48. if len(src) < minNonLiteralBlockSize {
  49. return 0
  50. }
  51. // Initialize the hash tables.
  52. const (
  53. // Long hash matches.
  54. lTableBits = 17
  55. maxLTableSize = 1 << lTableBits
  56. // Short hash matches.
  57. sTableBits = 14
  58. maxSTableSize = 1 << sTableBits
  59. )
  60. var lTable [maxLTableSize]uint32
  61. var sTable [maxSTableSize]uint32
  62. // Bail if we can't compress to at least this.
  63. dstLimit := len(src) - len(src)>>5 - 6
  64. // nextEmit is where in src the next emitLiteral should start from.
  65. nextEmit := 0
  66. // The encoded form must start with a literal, as there are no previous
  67. // bytes to copy, so we start looking for hash matches at s == 1.
  68. s := 1
  69. cv := load64(src, s)
  70. // We initialize repeat to 0, so we never match on first attempt
  71. repeat := 0
  72. for {
  73. candidateL := 0
  74. nextS := 0
  75. for {
  76. // Next src position to check
  77. nextS = s + (s-nextEmit)>>7 + 1
  78. if nextS > sLimit {
  79. goto emitRemainder
  80. }
  81. hashL := hash7(cv, lTableBits)
  82. hashS := hash4(cv, sTableBits)
  83. candidateL = int(lTable[hashL])
  84. candidateS := int(sTable[hashS])
  85. lTable[hashL] = uint32(s)
  86. sTable[hashS] = uint32(s)
  87. valLong := load64(src, candidateL)
  88. valShort := load64(src, candidateS)
  89. // If long matches at least 8 bytes, use that.
  90. if cv == valLong {
  91. break
  92. }
  93. if cv == valShort {
  94. candidateL = candidateS
  95. break
  96. }
  97. // Check repeat at offset checkRep.
  98. const checkRep = 1
  99. // Minimum length of a repeat. Tested with various values.
  100. // While 4-5 offers improvements in some, 6 reduces
  101. // regressions significantly.
  102. const wantRepeatBytes = 6
  103. const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
  104. if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
  105. base := s + checkRep
  106. // Extend back
  107. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  108. i--
  109. base--
  110. }
  111. d += emitLiteral(dst[d:], src[nextEmit:base])
  112. // Extend forward
  113. candidate := s - repeat + wantRepeatBytes + checkRep
  114. s += wantRepeatBytes + checkRep
  115. for s < len(src) {
  116. if len(src)-s < 8 {
  117. if src[s] == src[candidate] {
  118. s++
  119. candidate++
  120. continue
  121. }
  122. break
  123. }
  124. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  125. s += bits.TrailingZeros64(diff) >> 3
  126. break
  127. }
  128. s += 8
  129. candidate += 8
  130. }
  131. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  132. d += emitRepeat(dst[d:], repeat, s-base)
  133. nextEmit = s
  134. if s >= sLimit {
  135. goto emitRemainder
  136. }
  137. // Index in-between
  138. index0 := base + 1
  139. index1 := s - 2
  140. for index0 < index1 {
  141. cv0 := load64(src, index0)
  142. cv1 := load64(src, index1)
  143. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  144. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  145. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  146. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  147. index0 += 2
  148. index1 -= 2
  149. }
  150. cv = load64(src, s)
  151. continue
  152. }
  153. // Long likely matches 7, so take that.
  154. if uint32(cv) == uint32(valLong) {
  155. break
  156. }
  157. // Check our short candidate
  158. if uint32(cv) == uint32(valShort) {
  159. // Try a long candidate at s+1
  160. hashL = hash7(cv>>8, lTableBits)
  161. candidateL = int(lTable[hashL])
  162. lTable[hashL] = uint32(s + 1)
  163. if uint32(cv>>8) == load32(src, candidateL) {
  164. s++
  165. break
  166. }
  167. // Use our short candidate.
  168. candidateL = candidateS
  169. break
  170. }
  171. cv = load64(src, nextS)
  172. s = nextS
  173. }
  174. // Extend backwards
  175. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  176. candidateL--
  177. s--
  178. }
  179. // Bail if we exceed the maximum size.
  180. if d+(s-nextEmit) > dstLimit {
  181. return 0
  182. }
  183. base := s
  184. offset := base - candidateL
  185. // Extend the 4-byte match as long as possible.
  186. s += 4
  187. candidateL += 4
  188. for s < len(src) {
  189. if len(src)-s < 8 {
  190. if src[s] == src[candidateL] {
  191. s++
  192. candidateL++
  193. continue
  194. }
  195. break
  196. }
  197. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  198. s += bits.TrailingZeros64(diff) >> 3
  199. break
  200. }
  201. s += 8
  202. candidateL += 8
  203. }
  204. if offset > 65535 && s-base <= 5 && repeat != offset {
  205. // Bail if the match is equal or worse to the encoding.
  206. s = nextS + 1
  207. if s >= sLimit {
  208. goto emitRemainder
  209. }
  210. cv = load64(src, s)
  211. continue
  212. }
  213. d += emitLiteral(dst[d:], src[nextEmit:base])
  214. if repeat == offset {
  215. d += emitRepeat(dst[d:], offset, s-base)
  216. } else {
  217. d += emitCopy(dst[d:], offset, s-base)
  218. repeat = offset
  219. }
  220. nextEmit = s
  221. if s >= sLimit {
  222. goto emitRemainder
  223. }
  224. if d > dstLimit {
  225. // Do we have space for more, if not bail.
  226. return 0
  227. }
  228. // Index short & long
  229. index0 := base + 1
  230. index1 := s - 2
  231. cv0 := load64(src, index0)
  232. cv1 := load64(src, index1)
  233. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  234. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  235. // lTable could be postponed, but very minor difference.
  236. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  237. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  238. index0 += 1
  239. index1 -= 1
  240. cv = load64(src, s)
  241. // Index large values sparsely in between.
  242. // We do two starting from different offsets for speed.
  243. index2 := (index0 + index1 + 1) >> 1
  244. for index2 < index1 {
  245. lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
  246. lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
  247. index0 += 2
  248. index2 += 2
  249. }
  250. }
  251. emitRemainder:
  252. if nextEmit < len(src) {
  253. // Bail if we exceed the maximum size.
  254. if d+len(src)-nextEmit > dstLimit {
  255. return 0
  256. }
  257. d += emitLiteral(dst[d:], src[nextEmit:])
  258. }
  259. return d
  260. }
  261. // encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
  262. // assumes that the varint-encoded length of the decompressed bytes has already
  263. // been written.
  264. //
  265. // It also assumes that:
  266. //
  267. // len(dst) >= MaxEncodedLen(len(src)) &&
  268. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  269. func encodeBlockBetterSnappyGo(dst, src []byte) (d int) {
  270. // sLimit is when to stop looking for offset/length copies. The inputMargin
  271. // lets us use a fast path for emitLiteral in the main loop, while we are
  272. // looking for copies.
  273. sLimit := len(src) - inputMargin
  274. if len(src) < minNonLiteralBlockSize {
  275. return 0
  276. }
  277. // Initialize the hash tables.
  278. const (
  279. // Long hash matches.
  280. lTableBits = 16
  281. maxLTableSize = 1 << lTableBits
  282. // Short hash matches.
  283. sTableBits = 14
  284. maxSTableSize = 1 << sTableBits
  285. )
  286. var lTable [maxLTableSize]uint32
  287. var sTable [maxSTableSize]uint32
  288. // Bail if we can't compress to at least this.
  289. dstLimit := len(src) - len(src)>>5 - 6
  290. // nextEmit is where in src the next emitLiteral should start from.
  291. nextEmit := 0
  292. // The encoded form must start with a literal, as there are no previous
  293. // bytes to copy, so we start looking for hash matches at s == 1.
  294. s := 1
  295. cv := load64(src, s)
  296. // We initialize repeat to 0, so we never match on first attempt
  297. repeat := 0
  298. const maxSkip = 100
  299. for {
  300. candidateL := 0
  301. nextS := 0
  302. for {
  303. // Next src position to check
  304. nextS = min(s+(s-nextEmit)>>7+1, s+maxSkip)
  305. if nextS > sLimit {
  306. goto emitRemainder
  307. }
  308. hashL := hash7(cv, lTableBits)
  309. hashS := hash4(cv, sTableBits)
  310. candidateL = int(lTable[hashL])
  311. candidateS := int(sTable[hashS])
  312. lTable[hashL] = uint32(s)
  313. sTable[hashS] = uint32(s)
  314. if uint32(cv) == load32(src, candidateL) {
  315. break
  316. }
  317. // Check our short candidate
  318. if uint32(cv) == load32(src, candidateS) {
  319. // Try a long candidate at s+1
  320. hashL = hash7(cv>>8, lTableBits)
  321. candidateL = int(lTable[hashL])
  322. lTable[hashL] = uint32(s + 1)
  323. if uint32(cv>>8) == load32(src, candidateL) {
  324. s++
  325. break
  326. }
  327. // Use our short candidate.
  328. candidateL = candidateS
  329. break
  330. }
  331. cv = load64(src, nextS)
  332. s = nextS
  333. }
  334. // Extend backwards
  335. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  336. candidateL--
  337. s--
  338. }
  339. // Bail if we exceed the maximum size.
  340. if d+(s-nextEmit) > dstLimit {
  341. return 0
  342. }
  343. base := s
  344. offset := base - candidateL
  345. // Extend the 4-byte match as long as possible.
  346. s += 4
  347. candidateL += 4
  348. for s < len(src) {
  349. if len(src)-s < 8 {
  350. if src[s] == src[candidateL] {
  351. s++
  352. candidateL++
  353. continue
  354. }
  355. break
  356. }
  357. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  358. s += bits.TrailingZeros64(diff) >> 3
  359. break
  360. }
  361. s += 8
  362. candidateL += 8
  363. }
  364. if offset > 65535 && s-base <= 5 && repeat != offset {
  365. // Bail if the match is equal or worse to the encoding.
  366. s = nextS + 1
  367. if s >= sLimit {
  368. goto emitRemainder
  369. }
  370. cv = load64(src, s)
  371. continue
  372. }
  373. d += emitLiteral(dst[d:], src[nextEmit:base])
  374. d += emitCopyNoRepeat(dst[d:], offset, s-base)
  375. repeat = offset
  376. nextEmit = s
  377. if s >= sLimit {
  378. goto emitRemainder
  379. }
  380. if d > dstLimit {
  381. // Do we have space for more, if not bail.
  382. return 0
  383. }
  384. // Index short & long
  385. index0 := base + 1
  386. index1 := s - 2
  387. cv0 := load64(src, index0)
  388. cv1 := load64(src, index1)
  389. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  390. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  391. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  392. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  393. index0 += 1
  394. index1 -= 1
  395. cv = load64(src, s)
  396. // Index large values sparsely in between.
  397. // We do two starting from different offsets for speed.
  398. index2 := (index0 + index1 + 1) >> 1
  399. for index2 < index1 {
  400. lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
  401. lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
  402. index0 += 2
  403. index2 += 2
  404. }
  405. }
  406. emitRemainder:
  407. if nextEmit < len(src) {
  408. // Bail if we exceed the maximum size.
  409. if d+len(src)-nextEmit > dstLimit {
  410. return 0
  411. }
  412. d += emitLiteral(dst[d:], src[nextEmit:])
  413. }
  414. return d
  415. }
  416. func encodeBlockBetterGo64K(dst, src []byte) (d int) {
  417. // sLimit is when to stop looking for offset/length copies. The inputMargin
  418. // lets us use a fast path for emitLiteral in the main loop, while we are
  419. // looking for copies.
  420. sLimit := len(src) - inputMargin
  421. if len(src) < minNonLiteralBlockSize {
  422. return 0
  423. }
  424. // Initialize the hash tables.
  425. // Use smaller tables for smaller blocks
  426. const (
  427. // Long hash matches.
  428. lTableBits = 16
  429. maxLTableSize = 1 << lTableBits
  430. // Short hash matches.
  431. sTableBits = 13
  432. maxSTableSize = 1 << sTableBits
  433. )
  434. var lTable [maxLTableSize]uint16
  435. var sTable [maxSTableSize]uint16
  436. // Bail if we can't compress to at least this.
  437. dstLimit := len(src) - len(src)>>5 - 6
  438. // nextEmit is where in src the next emitLiteral should start from.
  439. nextEmit := 0
  440. // The encoded form must start with a literal, as there are no previous
  441. // bytes to copy, so we start looking for hash matches at s == 1.
  442. s := 1
  443. cv := load64(src, s)
  444. // We initialize repeat to 0, so we never match on first attempt
  445. repeat := 0
  446. for {
  447. candidateL := 0
  448. nextS := 0
  449. for {
  450. // Next src position to check
  451. nextS = s + (s-nextEmit)>>6 + 1
  452. if nextS > sLimit {
  453. goto emitRemainder
  454. }
  455. hashL := hash7(cv, lTableBits)
  456. hashS := hash4(cv, sTableBits)
  457. candidateL = int(lTable[hashL])
  458. candidateS := int(sTable[hashS])
  459. lTable[hashL] = uint16(s)
  460. sTable[hashS] = uint16(s)
  461. valLong := load64(src, candidateL)
  462. valShort := load64(src, candidateS)
  463. // If long matches at least 8 bytes, use that.
  464. if cv == valLong {
  465. break
  466. }
  467. if cv == valShort {
  468. candidateL = candidateS
  469. break
  470. }
  471. // Check repeat at offset checkRep.
  472. const checkRep = 1
  473. // Minimum length of a repeat. Tested with various values.
  474. // While 4-5 offers improvements in some, 6 reduces
  475. // regressions significantly.
  476. const wantRepeatBytes = 6
  477. const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
  478. if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
  479. base := s + checkRep
  480. // Extend back
  481. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  482. i--
  483. base--
  484. }
  485. d += emitLiteral(dst[d:], src[nextEmit:base])
  486. // Extend forward
  487. candidate := s - repeat + wantRepeatBytes + checkRep
  488. s += wantRepeatBytes + checkRep
  489. for s < len(src) {
  490. if len(src)-s < 8 {
  491. if src[s] == src[candidate] {
  492. s++
  493. candidate++
  494. continue
  495. }
  496. break
  497. }
  498. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  499. s += bits.TrailingZeros64(diff) >> 3
  500. break
  501. }
  502. s += 8
  503. candidate += 8
  504. }
  505. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  506. d += emitRepeat(dst[d:], repeat, s-base)
  507. nextEmit = s
  508. if s >= sLimit {
  509. goto emitRemainder
  510. }
  511. // Index in-between
  512. index0 := base + 1
  513. index1 := s - 2
  514. for index0 < index1 {
  515. cv0 := load64(src, index0)
  516. cv1 := load64(src, index1)
  517. lTable[hash7(cv0, lTableBits)] = uint16(index0)
  518. sTable[hash4(cv0>>8, sTableBits)] = uint16(index0 + 1)
  519. lTable[hash7(cv1, lTableBits)] = uint16(index1)
  520. sTable[hash4(cv1>>8, sTableBits)] = uint16(index1 + 1)
  521. index0 += 2
  522. index1 -= 2
  523. }
  524. cv = load64(src, s)
  525. continue
  526. }
  527. // Long likely matches 7, so take that.
  528. if uint32(cv) == uint32(valLong) {
  529. break
  530. }
  531. // Check our short candidate
  532. if uint32(cv) == uint32(valShort) {
  533. // Try a long candidate at s+1
  534. hashL = hash7(cv>>8, lTableBits)
  535. candidateL = int(lTable[hashL])
  536. lTable[hashL] = uint16(s + 1)
  537. if uint32(cv>>8) == load32(src, candidateL) {
  538. s++
  539. break
  540. }
  541. // Use our short candidate.
  542. candidateL = candidateS
  543. break
  544. }
  545. cv = load64(src, nextS)
  546. s = nextS
  547. }
  548. // Extend backwards
  549. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  550. candidateL--
  551. s--
  552. }
  553. // Bail if we exceed the maximum size.
  554. if d+(s-nextEmit) > dstLimit {
  555. return 0
  556. }
  557. base := s
  558. offset := base - candidateL
  559. // Extend the 4-byte match as long as possible.
  560. s += 4
  561. candidateL += 4
  562. for s < len(src) {
  563. if len(src)-s < 8 {
  564. if src[s] == src[candidateL] {
  565. s++
  566. candidateL++
  567. continue
  568. }
  569. break
  570. }
  571. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  572. s += bits.TrailingZeros64(diff) >> 3
  573. break
  574. }
  575. s += 8
  576. candidateL += 8
  577. }
  578. d += emitLiteral(dst[d:], src[nextEmit:base])
  579. if repeat == offset {
  580. d += emitRepeat(dst[d:], offset, s-base)
  581. } else {
  582. d += emitCopy(dst[d:], offset, s-base)
  583. repeat = offset
  584. }
  585. nextEmit = s
  586. if s >= sLimit {
  587. goto emitRemainder
  588. }
  589. if d > dstLimit {
  590. // Do we have space for more, if not bail.
  591. return 0
  592. }
  593. // Index short & long
  594. index0 := base + 1
  595. index1 := s - 2
  596. cv0 := load64(src, index0)
  597. cv1 := load64(src, index1)
  598. lTable[hash7(cv0, lTableBits)] = uint16(index0)
  599. sTable[hash4(cv0>>8, sTableBits)] = uint16(index0 + 1)
  600. // lTable could be postponed, but very minor difference.
  601. lTable[hash7(cv1, lTableBits)] = uint16(index1)
  602. sTable[hash4(cv1>>8, sTableBits)] = uint16(index1 + 1)
  603. index0 += 1
  604. index1 -= 1
  605. cv = load64(src, s)
  606. // Index large values sparsely in between.
  607. // We do two starting from different offsets for speed.
  608. index2 := (index0 + index1 + 1) >> 1
  609. for index2 < index1 {
  610. lTable[hash7(load64(src, index0), lTableBits)] = uint16(index0)
  611. lTable[hash7(load64(src, index2), lTableBits)] = uint16(index2)
  612. index0 += 2
  613. index2 += 2
  614. }
  615. }
  616. emitRemainder:
  617. if nextEmit < len(src) {
  618. // Bail if we exceed the maximum size.
  619. if d+len(src)-nextEmit > dstLimit {
  620. return 0
  621. }
  622. d += emitLiteral(dst[d:], src[nextEmit:])
  623. }
  624. return d
  625. }
  626. // encodeBlockBetterSnappyGo encodes a non-empty src to a guaranteed-large-enough dst. It
  627. // assumes that the varint-encoded length of the decompressed bytes has already
  628. // been written.
  629. //
  630. // It also assumes that:
  631. //
  632. // len(dst) >= MaxEncodedLen(len(src)) &&
  633. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  634. func encodeBlockBetterSnappyGo64K(dst, src []byte) (d int) {
  635. // sLimit is when to stop looking for offset/length copies. The inputMargin
  636. // lets us use a fast path for emitLiteral in the main loop, while we are
  637. // looking for copies.
  638. sLimit := len(src) - inputMargin
  639. if len(src) < minNonLiteralBlockSize {
  640. return 0
  641. }
  642. // Initialize the hash tables.
  643. // Use smaller tables for smaller blocks
  644. const (
  645. // Long hash matches.
  646. lTableBits = 15
  647. maxLTableSize = 1 << lTableBits
  648. // Short hash matches.
  649. sTableBits = 13
  650. maxSTableSize = 1 << sTableBits
  651. )
  652. var lTable [maxLTableSize]uint16
  653. var sTable [maxSTableSize]uint16
  654. // Bail if we can't compress to at least this.
  655. dstLimit := len(src) - len(src)>>5 - 6
  656. // nextEmit is where in src the next emitLiteral should start from.
  657. nextEmit := 0
  658. // The encoded form must start with a literal, as there are no previous
  659. // bytes to copy, so we start looking for hash matches at s == 1.
  660. s := 1
  661. cv := load64(src, s)
  662. const maxSkip = 100
  663. for {
  664. candidateL := 0
  665. nextS := 0
  666. for {
  667. // Next src position to check
  668. nextS = min(s+(s-nextEmit)>>6+1, s+maxSkip)
  669. if nextS > sLimit {
  670. goto emitRemainder
  671. }
  672. hashL := hash7(cv, lTableBits)
  673. hashS := hash4(cv, sTableBits)
  674. candidateL = int(lTable[hashL])
  675. candidateS := int(sTable[hashS])
  676. lTable[hashL] = uint16(s)
  677. sTable[hashS] = uint16(s)
  678. if uint32(cv) == load32(src, candidateL) {
  679. break
  680. }
  681. // Check our short candidate
  682. if uint32(cv) == load32(src, candidateS) {
  683. // Try a long candidate at s+1
  684. hashL = hash7(cv>>8, lTableBits)
  685. candidateL = int(lTable[hashL])
  686. lTable[hashL] = uint16(s + 1)
  687. if uint32(cv>>8) == load32(src, candidateL) {
  688. s++
  689. break
  690. }
  691. // Use our short candidate.
  692. candidateL = candidateS
  693. break
  694. }
  695. cv = load64(src, nextS)
  696. s = nextS
  697. }
  698. // Extend backwards
  699. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  700. candidateL--
  701. s--
  702. }
  703. // Bail if we exceed the maximum size.
  704. if d+(s-nextEmit) > dstLimit {
  705. return 0
  706. }
  707. base := s
  708. offset := base - candidateL
  709. // Extend the 4-byte match as long as possible.
  710. s += 4
  711. candidateL += 4
  712. for s < len(src) {
  713. if len(src)-s < 8 {
  714. if src[s] == src[candidateL] {
  715. s++
  716. candidateL++
  717. continue
  718. }
  719. break
  720. }
  721. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  722. s += bits.TrailingZeros64(diff) >> 3
  723. break
  724. }
  725. s += 8
  726. candidateL += 8
  727. }
  728. d += emitLiteral(dst[d:], src[nextEmit:base])
  729. d += emitCopyNoRepeat(dst[d:], offset, s-base)
  730. nextEmit = s
  731. if s >= sLimit {
  732. goto emitRemainder
  733. }
  734. if d > dstLimit {
  735. // Do we have space for more, if not bail.
  736. return 0
  737. }
  738. // Index short & long
  739. index0 := base + 1
  740. index1 := s - 2
  741. cv0 := load64(src, index0)
  742. cv1 := load64(src, index1)
  743. lTable[hash7(cv0, lTableBits)] = uint16(index0)
  744. sTable[hash4(cv0>>8, sTableBits)] = uint16(index0 + 1)
  745. lTable[hash7(cv1, lTableBits)] = uint16(index1)
  746. sTable[hash4(cv1>>8, sTableBits)] = uint16(index1 + 1)
  747. index0 += 1
  748. index1 -= 1
  749. cv = load64(src, s)
  750. // Index large values sparsely in between.
  751. // We do two starting from different offsets for speed.
  752. index2 := (index0 + index1 + 1) >> 1
  753. for index2 < index1 {
  754. lTable[hash7(load64(src, index0), lTableBits)] = uint16(index0)
  755. lTable[hash7(load64(src, index2), lTableBits)] = uint16(index2)
  756. index0 += 2
  757. index2 += 2
  758. }
  759. }
  760. emitRemainder:
  761. if nextEmit < len(src) {
  762. // Bail if we exceed the maximum size.
  763. if d+len(src)-nextEmit > dstLimit {
  764. return 0
  765. }
  766. d += emitLiteral(dst[d:], src[nextEmit:])
  767. }
  768. return d
  769. }
  770. // encodeBlockBetterDict encodes a non-empty src to a guaranteed-large-enough dst. It
  771. // assumes that the varint-encoded length of the decompressed bytes has already
  772. // been written.
  773. //
  774. // It also assumes that:
  775. //
  776. // len(dst) >= MaxEncodedLen(len(src)) &&
  777. // minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
  778. func encodeBlockBetterDict(dst, src []byte, dict *Dict) (d int) {
  779. // sLimit is when to stop looking for offset/length copies. The inputMargin
  780. // lets us use a fast path for emitLiteral in the main loop, while we are
  781. // looking for copies.
  782. // Initialize the hash tables.
  783. const (
  784. // Long hash matches.
  785. lTableBits = 17
  786. maxLTableSize = 1 << lTableBits
  787. // Short hash matches.
  788. sTableBits = 14
  789. maxSTableSize = 1 << sTableBits
  790. maxAhead = 8 // maximum bytes ahead without checking sLimit
  791. debug = false
  792. )
  793. sLimit := len(src) - inputMargin
  794. if sLimit > MaxDictSrcOffset-maxAhead {
  795. sLimit = MaxDictSrcOffset - maxAhead
  796. }
  797. if len(src) < minNonLiteralBlockSize {
  798. return 0
  799. }
  800. dict.initBetter()
  801. var lTable [maxLTableSize]uint32
  802. var sTable [maxSTableSize]uint32
  803. // Bail if we can't compress to at least this.
  804. dstLimit := len(src) - len(src)>>5 - 6
  805. // nextEmit is where in src the next emitLiteral should start from.
  806. nextEmit := 0
  807. // The encoded form must start with a literal, as there are no previous
  808. // bytes to copy, so we start looking for hash matches at s == 1.
  809. s := 0
  810. cv := load64(src, s)
  811. // We initialize repeat to 0, so we never match on first attempt
  812. repeat := len(dict.dict) - dict.repeat
  813. // While in dict
  814. searchDict:
  815. for {
  816. candidateL := 0
  817. nextS := 0
  818. for {
  819. // Next src position to check
  820. nextS = s + (s-nextEmit)>>7 + 1
  821. if nextS > sLimit {
  822. break searchDict
  823. }
  824. hashL := hash7(cv, lTableBits)
  825. hashS := hash4(cv, sTableBits)
  826. candidateL = int(lTable[hashL])
  827. candidateS := int(sTable[hashS])
  828. dictL := int(dict.betterTableLong[hashL])
  829. dictS := int(dict.betterTableShort[hashS])
  830. lTable[hashL] = uint32(s)
  831. sTable[hashS] = uint32(s)
  832. valLong := load64(src, candidateL)
  833. valShort := load64(src, candidateS)
  834. // If long matches at least 8 bytes, use that.
  835. if s != 0 {
  836. if cv == valLong {
  837. goto emitMatch
  838. }
  839. if cv == valShort {
  840. candidateL = candidateS
  841. goto emitMatch
  842. }
  843. }
  844. // Check dict repeat.
  845. if repeat >= s+4 {
  846. candidate := len(dict.dict) - repeat + s
  847. if candidate > 0 && uint32(cv) == load32(dict.dict, candidate) {
  848. // Extend back
  849. base := s
  850. for i := candidate; base > nextEmit && i > 0 && dict.dict[i-1] == src[base-1]; {
  851. i--
  852. base--
  853. }
  854. d += emitLiteral(dst[d:], src[nextEmit:base])
  855. if debug && nextEmit != base {
  856. fmt.Println("emitted ", base-nextEmit, "literals")
  857. }
  858. s += 4
  859. candidate += 4
  860. for candidate < len(dict.dict)-8 && s <= len(src)-8 {
  861. if diff := load64(src, s) ^ load64(dict.dict, candidate); diff != 0 {
  862. s += bits.TrailingZeros64(diff) >> 3
  863. break
  864. }
  865. s += 8
  866. candidate += 8
  867. }
  868. d += emitRepeat(dst[d:], repeat, s-base)
  869. if debug {
  870. fmt.Println("emitted dict repeat length", s-base, "offset:", repeat, "s:", s)
  871. }
  872. nextEmit = s
  873. if s >= sLimit {
  874. break searchDict
  875. }
  876. // Index in-between
  877. index0 := base + 1
  878. index1 := s - 2
  879. cv = load64(src, s)
  880. for index0 < index1 {
  881. cv0 := load64(src, index0)
  882. cv1 := load64(src, index1)
  883. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  884. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  885. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  886. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  887. index0 += 2
  888. index1 -= 2
  889. }
  890. continue
  891. }
  892. }
  893. // Don't try to find match at s==0
  894. if s == 0 {
  895. cv = load64(src, nextS)
  896. s = nextS
  897. continue
  898. }
  899. // Long likely matches 7, so take that.
  900. if uint32(cv) == uint32(valLong) {
  901. goto emitMatch
  902. }
  903. // Long dict...
  904. if uint32(cv) == load32(dict.dict, dictL) {
  905. candidateL = dictL
  906. goto emitDict
  907. }
  908. // Check our short candidate
  909. if uint32(cv) == uint32(valShort) {
  910. // Try a long candidate at s+1
  911. hashL = hash7(cv>>8, lTableBits)
  912. candidateL = int(lTable[hashL])
  913. lTable[hashL] = uint32(s + 1)
  914. if uint32(cv>>8) == load32(src, candidateL) {
  915. s++
  916. goto emitMatch
  917. }
  918. // Use our short candidate.
  919. candidateL = candidateS
  920. goto emitMatch
  921. }
  922. if uint32(cv) == load32(dict.dict, dictS) {
  923. // Try a long candidate at s+1
  924. hashL = hash7(cv>>8, lTableBits)
  925. candidateL = int(lTable[hashL])
  926. lTable[hashL] = uint32(s + 1)
  927. if uint32(cv>>8) == load32(src, candidateL) {
  928. s++
  929. goto emitMatch
  930. }
  931. candidateL = dictS
  932. goto emitDict
  933. }
  934. cv = load64(src, nextS)
  935. s = nextS
  936. }
  937. emitDict:
  938. {
  939. if debug {
  940. if load32(dict.dict, candidateL) != load32(src, s) {
  941. panic("dict emit mismatch")
  942. }
  943. }
  944. // Extend backwards.
  945. // The top bytes will be rechecked to get the full match.
  946. for candidateL > 0 && s > nextEmit && dict.dict[candidateL-1] == src[s-1] {
  947. candidateL--
  948. s--
  949. }
  950. // Bail if we exceed the maximum size.
  951. if d+(s-nextEmit) > dstLimit {
  952. return 0
  953. }
  954. // A 4-byte match has been found. We'll later see if more than 4 bytes
  955. // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
  956. // them as literal bytes.
  957. d += emitLiteral(dst[d:], src[nextEmit:s])
  958. if debug && nextEmit != s {
  959. fmt.Println("emitted ", s-nextEmit, "literals")
  960. }
  961. {
  962. // Invariant: we have a 4-byte match at s, and no need to emit any
  963. // literal bytes prior to s.
  964. base := s
  965. offset := s + (len(dict.dict)) - candidateL
  966. // Extend the 4-byte match as long as possible.
  967. s += 4
  968. candidateL += 4
  969. for s <= len(src)-8 && len(dict.dict)-candidateL >= 8 {
  970. if diff := load64(src, s) ^ load64(dict.dict, candidateL); diff != 0 {
  971. s += bits.TrailingZeros64(diff) >> 3
  972. break
  973. }
  974. s += 8
  975. candidateL += 8
  976. }
  977. if repeat == offset {
  978. if debug {
  979. fmt.Println("emitted dict repeat, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
  980. }
  981. d += emitRepeat(dst[d:], offset, s-base)
  982. } else {
  983. if debug {
  984. fmt.Println("emitted dict copy, length", s-base, "offset:", offset, "s:", s, "dict offset:", candidateL)
  985. }
  986. // Matches longer than 64 are split.
  987. if s <= sLimit || s-base < 8 {
  988. d += emitCopy(dst[d:], offset, s-base)
  989. } else {
  990. // Split to ensure we don't start a copy within next block.
  991. d += emitCopy(dst[d:], offset, 4)
  992. d += emitRepeat(dst[d:], offset, s-base-4)
  993. }
  994. repeat = offset
  995. }
  996. if false {
  997. // Validate match.
  998. if s <= candidateL {
  999. panic("s <= candidate")
  1000. }
  1001. a := src[base:s]
  1002. b := dict.dict[base-repeat : base-repeat+(s-base)]
  1003. if !bytes.Equal(a, b) {
  1004. panic("mismatch")
  1005. }
  1006. }
  1007. nextEmit = s
  1008. if s >= sLimit {
  1009. break searchDict
  1010. }
  1011. if d > dstLimit {
  1012. // Do we have space for more, if not bail.
  1013. return 0
  1014. }
  1015. // Index short & long
  1016. index0 := base + 1
  1017. index1 := s - 2
  1018. cv0 := load64(src, index0)
  1019. cv1 := load64(src, index1)
  1020. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  1021. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  1022. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  1023. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  1024. index0 += 1
  1025. index1 -= 1
  1026. cv = load64(src, s)
  1027. // index every second long in between.
  1028. for index0 < index1 {
  1029. lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
  1030. lTable[hash7(load64(src, index1), lTableBits)] = uint32(index1)
  1031. index0 += 2
  1032. index1 -= 2
  1033. }
  1034. }
  1035. continue
  1036. }
  1037. emitMatch:
  1038. // Extend backwards
  1039. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  1040. candidateL--
  1041. s--
  1042. }
  1043. // Bail if we exceed the maximum size.
  1044. if d+(s-nextEmit) > dstLimit {
  1045. return 0
  1046. }
  1047. base := s
  1048. offset := base - candidateL
  1049. // Extend the 4-byte match as long as possible.
  1050. s += 4
  1051. candidateL += 4
  1052. for s < len(src) {
  1053. if len(src)-s < 8 {
  1054. if src[s] == src[candidateL] {
  1055. s++
  1056. candidateL++
  1057. continue
  1058. }
  1059. break
  1060. }
  1061. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  1062. s += bits.TrailingZeros64(diff) >> 3
  1063. break
  1064. }
  1065. s += 8
  1066. candidateL += 8
  1067. }
  1068. if offset > 65535 && s-base <= 5 && repeat != offset {
  1069. // Bail if the match is equal or worse to the encoding.
  1070. s = nextS + 1
  1071. if s >= sLimit {
  1072. goto emitRemainder
  1073. }
  1074. cv = load64(src, s)
  1075. continue
  1076. }
  1077. d += emitLiteral(dst[d:], src[nextEmit:base])
  1078. if debug && nextEmit != s {
  1079. fmt.Println("emitted ", s-nextEmit, "literals")
  1080. }
  1081. if repeat == offset {
  1082. if debug {
  1083. fmt.Println("emitted match repeat, length", s-base, "offset:", offset, "s:", s)
  1084. }
  1085. d += emitRepeat(dst[d:], offset, s-base)
  1086. } else {
  1087. if debug {
  1088. fmt.Println("emitted match copy, length", s-base, "offset:", offset, "s:", s)
  1089. }
  1090. d += emitCopy(dst[d:], offset, s-base)
  1091. repeat = offset
  1092. }
  1093. nextEmit = s
  1094. if s >= sLimit {
  1095. goto emitRemainder
  1096. }
  1097. if d > dstLimit {
  1098. // Do we have space for more, if not bail.
  1099. return 0
  1100. }
  1101. // Index short & long
  1102. index0 := base + 1
  1103. index1 := s - 2
  1104. cv0 := load64(src, index0)
  1105. cv1 := load64(src, index1)
  1106. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  1107. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  1108. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  1109. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  1110. index0 += 1
  1111. index1 -= 1
  1112. cv = load64(src, s)
  1113. // Index large values sparsely in between.
  1114. // We do two starting from different offsets for speed.
  1115. index2 := (index0 + index1 + 1) >> 1
  1116. for index2 < index1 {
  1117. lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
  1118. lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
  1119. index0 += 2
  1120. index2 += 2
  1121. }
  1122. }
  1123. // Search without dict:
  1124. if repeat > s {
  1125. repeat = 0
  1126. }
  1127. // No more dict
  1128. sLimit = len(src) - inputMargin
  1129. if s >= sLimit {
  1130. goto emitRemainder
  1131. }
  1132. cv = load64(src, s)
  1133. if debug {
  1134. fmt.Println("now", s, "->", sLimit, "out:", d, "left:", len(src)-s, "nextemit:", nextEmit, "dstLimit:", dstLimit, "s:", s)
  1135. }
  1136. for {
  1137. candidateL := 0
  1138. nextS := 0
  1139. for {
  1140. // Next src position to check
  1141. nextS = s + (s-nextEmit)>>7 + 1
  1142. if nextS > sLimit {
  1143. goto emitRemainder
  1144. }
  1145. hashL := hash7(cv, lTableBits)
  1146. hashS := hash4(cv, sTableBits)
  1147. candidateL = int(lTable[hashL])
  1148. candidateS := int(sTable[hashS])
  1149. lTable[hashL] = uint32(s)
  1150. sTable[hashS] = uint32(s)
  1151. valLong := load64(src, candidateL)
  1152. valShort := load64(src, candidateS)
  1153. // If long matches at least 8 bytes, use that.
  1154. if cv == valLong {
  1155. break
  1156. }
  1157. if cv == valShort {
  1158. candidateL = candidateS
  1159. break
  1160. }
  1161. // Check repeat at offset checkRep.
  1162. const checkRep = 1
  1163. // Minimum length of a repeat. Tested with various values.
  1164. // While 4-5 offers improvements in some, 6 reduces
  1165. // regressions significantly.
  1166. const wantRepeatBytes = 6
  1167. const repeatMask = ((1 << (wantRepeatBytes * 8)) - 1) << (8 * checkRep)
  1168. if false && repeat > 0 && cv&repeatMask == load64(src, s-repeat)&repeatMask {
  1169. base := s + checkRep
  1170. // Extend back
  1171. for i := base - repeat; base > nextEmit && i > 0 && src[i-1] == src[base-1]; {
  1172. i--
  1173. base--
  1174. }
  1175. d += emitLiteral(dst[d:], src[nextEmit:base])
  1176. // Extend forward
  1177. candidate := s - repeat + wantRepeatBytes + checkRep
  1178. s += wantRepeatBytes + checkRep
  1179. for s < len(src) {
  1180. if len(src)-s < 8 {
  1181. if src[s] == src[candidate] {
  1182. s++
  1183. candidate++
  1184. continue
  1185. }
  1186. break
  1187. }
  1188. if diff := load64(src, s) ^ load64(src, candidate); diff != 0 {
  1189. s += bits.TrailingZeros64(diff) >> 3
  1190. break
  1191. }
  1192. s += 8
  1193. candidate += 8
  1194. }
  1195. // same as `add := emitCopy(dst[d:], repeat, s-base)` but skips storing offset.
  1196. d += emitRepeat(dst[d:], repeat, s-base)
  1197. nextEmit = s
  1198. if s >= sLimit {
  1199. goto emitRemainder
  1200. }
  1201. // Index in-between
  1202. index0 := base + 1
  1203. index1 := s - 2
  1204. for index0 < index1 {
  1205. cv0 := load64(src, index0)
  1206. cv1 := load64(src, index1)
  1207. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  1208. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  1209. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  1210. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  1211. index0 += 2
  1212. index1 -= 2
  1213. }
  1214. cv = load64(src, s)
  1215. continue
  1216. }
  1217. // Long likely matches 7, so take that.
  1218. if uint32(cv) == uint32(valLong) {
  1219. break
  1220. }
  1221. // Check our short candidate
  1222. if uint32(cv) == uint32(valShort) {
  1223. // Try a long candidate at s+1
  1224. hashL = hash7(cv>>8, lTableBits)
  1225. candidateL = int(lTable[hashL])
  1226. lTable[hashL] = uint32(s + 1)
  1227. if uint32(cv>>8) == load32(src, candidateL) {
  1228. s++
  1229. break
  1230. }
  1231. // Use our short candidate.
  1232. candidateL = candidateS
  1233. break
  1234. }
  1235. cv = load64(src, nextS)
  1236. s = nextS
  1237. }
  1238. // Extend backwards
  1239. for candidateL > 0 && s > nextEmit && src[candidateL-1] == src[s-1] {
  1240. candidateL--
  1241. s--
  1242. }
  1243. // Bail if we exceed the maximum size.
  1244. if d+(s-nextEmit) > dstLimit {
  1245. return 0
  1246. }
  1247. base := s
  1248. offset := base - candidateL
  1249. // Extend the 4-byte match as long as possible.
  1250. s += 4
  1251. candidateL += 4
  1252. for s < len(src) {
  1253. if len(src)-s < 8 {
  1254. if src[s] == src[candidateL] {
  1255. s++
  1256. candidateL++
  1257. continue
  1258. }
  1259. break
  1260. }
  1261. if diff := load64(src, s) ^ load64(src, candidateL); diff != 0 {
  1262. s += bits.TrailingZeros64(diff) >> 3
  1263. break
  1264. }
  1265. s += 8
  1266. candidateL += 8
  1267. }
  1268. if offset > 65535 && s-base <= 5 && repeat != offset {
  1269. // Bail if the match is equal or worse to the encoding.
  1270. s = nextS + 1
  1271. if s >= sLimit {
  1272. goto emitRemainder
  1273. }
  1274. cv = load64(src, s)
  1275. continue
  1276. }
  1277. d += emitLiteral(dst[d:], src[nextEmit:base])
  1278. if repeat == offset {
  1279. d += emitRepeat(dst[d:], offset, s-base)
  1280. } else {
  1281. d += emitCopy(dst[d:], offset, s-base)
  1282. repeat = offset
  1283. }
  1284. nextEmit = s
  1285. if s >= sLimit {
  1286. goto emitRemainder
  1287. }
  1288. if d > dstLimit {
  1289. // Do we have space for more, if not bail.
  1290. return 0
  1291. }
  1292. // Index short & long
  1293. index0 := base + 1
  1294. index1 := s - 2
  1295. cv0 := load64(src, index0)
  1296. cv1 := load64(src, index1)
  1297. lTable[hash7(cv0, lTableBits)] = uint32(index0)
  1298. sTable[hash4(cv0>>8, sTableBits)] = uint32(index0 + 1)
  1299. lTable[hash7(cv1, lTableBits)] = uint32(index1)
  1300. sTable[hash4(cv1>>8, sTableBits)] = uint32(index1 + 1)
  1301. index0 += 1
  1302. index1 -= 1
  1303. cv = load64(src, s)
  1304. // Index large values sparsely in between.
  1305. // We do two starting from different offsets for speed.
  1306. index2 := (index0 + index1 + 1) >> 1
  1307. for index2 < index1 {
  1308. lTable[hash7(load64(src, index0), lTableBits)] = uint32(index0)
  1309. lTable[hash7(load64(src, index2), lTableBits)] = uint32(index2)
  1310. index0 += 2
  1311. index2 += 2
  1312. }
  1313. }
  1314. emitRemainder:
  1315. if nextEmit < len(src) {
  1316. // Bail if we exceed the maximum size.
  1317. if d+len(src)-nextEmit > dstLimit {
  1318. return 0
  1319. }
  1320. d += emitLiteral(dst[d:], src[nextEmit:])
  1321. }
  1322. return d
  1323. }