sketch.go 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. /*
  2. * Copyright 2019 Dgraph Labs, Inc. and Contributors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package ristretto
  17. import (
  18. "fmt"
  19. "math/rand"
  20. "time"
  21. )
  22. // cmSketch is a Count-Min sketch implementation with 4-bit counters, heavily
  23. // based on Damian Gryski's CM4 [1].
  24. //
  25. // [1]: https://github.com/dgryski/go-tinylfu/blob/master/cm4.go
  26. type cmSketch struct {
  27. rows [cmDepth]cmRow
  28. seed [cmDepth]uint64
  29. mask uint64
  30. }
  31. const (
  32. // cmDepth is the number of counter copies to store (think of it as rows).
  33. cmDepth = 4
  34. )
  35. func newCmSketch(numCounters int64) *cmSketch {
  36. if numCounters == 0 {
  37. panic("cmSketch: bad numCounters")
  38. }
  39. // Get the next power of 2 for better cache performance.
  40. numCounters = next2Power(numCounters)
  41. sketch := &cmSketch{mask: uint64(numCounters - 1)}
  42. // Initialize rows of counters and seeds.
  43. // Cryptographic precision not needed
  44. source := rand.New(rand.NewSource(time.Now().UnixNano())) //nolint:gosec
  45. for i := 0; i < cmDepth; i++ {
  46. sketch.seed[i] = source.Uint64()
  47. sketch.rows[i] = newCmRow(numCounters)
  48. }
  49. return sketch
  50. }
  51. // Increment increments the count(ers) for the specified key.
  52. func (s *cmSketch) Increment(hashed uint64) {
  53. for i := range s.rows {
  54. s.rows[i].increment((hashed ^ s.seed[i]) & s.mask)
  55. }
  56. }
  57. // Estimate returns the value of the specified key.
  58. func (s *cmSketch) Estimate(hashed uint64) int64 {
  59. min := byte(255)
  60. for i := range s.rows {
  61. val := s.rows[i].get((hashed ^ s.seed[i]) & s.mask)
  62. if val < min {
  63. min = val
  64. }
  65. }
  66. return int64(min)
  67. }
  68. // Reset halves all counter values.
  69. func (s *cmSketch) Reset() {
  70. for _, r := range s.rows {
  71. r.reset()
  72. }
  73. }
  74. // Clear zeroes all counters.
  75. func (s *cmSketch) Clear() {
  76. for _, r := range s.rows {
  77. r.clear()
  78. }
  79. }
  80. // cmRow is a row of bytes, with each byte holding two counters.
  81. type cmRow []byte
  82. func newCmRow(numCounters int64) cmRow {
  83. return make(cmRow, numCounters/2)
  84. }
  85. func (r cmRow) get(n uint64) byte {
  86. return (r[n/2] >> ((n & 1) * 4)) & 0x0f
  87. }
  88. func (r cmRow) increment(n uint64) {
  89. // Index of the counter.
  90. i := n / 2
  91. // Shift distance (even 0, odd 4).
  92. s := (n & 1) * 4
  93. // Counter value.
  94. v := (r[i] >> s) & 0x0f
  95. // Only increment if not max value (overflow wrap is bad for LFU).
  96. if v < 15 {
  97. r[i] += 1 << s
  98. }
  99. }
  100. func (r cmRow) reset() {
  101. // Halve each counter.
  102. for i := range r {
  103. r[i] = (r[i] >> 1) & 0x77
  104. }
  105. }
  106. func (r cmRow) clear() {
  107. // Zero each counter.
  108. for i := range r {
  109. r[i] = 0
  110. }
  111. }
  112. func (r cmRow) string() string {
  113. s := ""
  114. for i := uint64(0); i < uint64(len(r)*2); i++ {
  115. s += fmt.Sprintf("%02d ", (r[(i/2)]>>((i&1)*4))&0x0f)
  116. }
  117. s = s[:len(s)-1]
  118. return s
  119. }
  120. // next2Power rounds x up to the next power of 2, if it's not already one.
  121. func next2Power(x int64) int64 {
  122. x--
  123. x |= x >> 1
  124. x |= x >> 2
  125. x |= x >> 4
  126. x |= x >> 8
  127. x |= x >> 16
  128. x |= x >> 32
  129. x++
  130. return x
  131. }