bbloom.go 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210
  1. // The MIT License (MIT)
  2. // Copyright (c) 2014 Andreas Briese, eduToolbox@Bri-C GmbH, Sarstedt
  3. // Permission is hereby granted, free of charge, to any person obtaining a copy of
  4. // this software and associated documentation files (the "Software"), to deal in
  5. // the Software without restriction, including without limitation the rights to
  6. // use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  7. // the Software, and to permit persons to whom the Software is furnished to do so,
  8. // subject to the following conditions:
  9. // The above copyright notice and this permission notice shall be included in all
  10. // copies or substantial portions of the Software.
  11. // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  12. // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
  13. // FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
  14. // COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
  15. // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  16. // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  17. package z
  18. import (
  19. "bytes"
  20. "encoding/json"
  21. "log"
  22. "math"
  23. "unsafe"
  24. )
  25. // helper
  26. var mask = []uint8{1, 2, 4, 8, 16, 32, 64, 128}
  27. func getSize(ui64 uint64) (size uint64, exponent uint64) {
  28. if ui64 < uint64(512) {
  29. ui64 = uint64(512)
  30. }
  31. size = uint64(1)
  32. for size < ui64 {
  33. size <<= 1
  34. exponent++
  35. }
  36. return size, exponent
  37. }
  38. func calcSizeByWrongPositives(numEntries, wrongs float64) (uint64, uint64) {
  39. size := -1 * numEntries * math.Log(wrongs) / math.Pow(float64(0.69314718056), 2)
  40. locs := math.Ceil(float64(0.69314718056) * size / numEntries)
  41. return uint64(size), uint64(locs)
  42. }
  43. // NewBloomFilter returns a new bloomfilter.
  44. func NewBloomFilter(params ...float64) (bloomfilter *Bloom) {
  45. var entries, locs uint64
  46. if len(params) == 2 {
  47. if params[1] < 1 {
  48. entries, locs = calcSizeByWrongPositives(params[0], params[1])
  49. } else {
  50. entries, locs = uint64(params[0]), uint64(params[1])
  51. }
  52. } else {
  53. log.Fatal("usage: New(float64(number_of_entries), float64(number_of_hashlocations))" +
  54. " i.e. New(float64(1000), float64(3)) or New(float64(number_of_entries)," +
  55. " float64(number_of_hashlocations)) i.e. New(float64(1000), float64(0.03))")
  56. }
  57. size, exponent := getSize(entries)
  58. bloomfilter = &Bloom{
  59. sizeExp: exponent,
  60. size: size - 1,
  61. setLocs: locs,
  62. shift: 64 - exponent,
  63. }
  64. bloomfilter.Size(size)
  65. return bloomfilter
  66. }
  67. // Bloom filter
  68. type Bloom struct {
  69. bitset []uint64
  70. ElemNum uint64
  71. sizeExp uint64
  72. size uint64
  73. setLocs uint64
  74. shift uint64
  75. }
  76. // <--- http://www.cse.yorku.ca/~oz/hash.html
  77. // modified Berkeley DB Hash (32bit)
  78. // hash is casted to l, h = 16bit fragments
  79. // func (bl Bloom) absdbm(b *[]byte) (l, h uint64) {
  80. // hash := uint64(len(*b))
  81. // for _, c := range *b {
  82. // hash = uint64(c) + (hash << 6) + (hash << bl.sizeExp) - hash
  83. // }
  84. // h = hash >> bl.shift
  85. // l = hash << bl.shift >> bl.shift
  86. // return l, h
  87. // }
  88. // Add adds hash of a key to the bloomfilter.
  89. func (bl *Bloom) Add(hash uint64) {
  90. h := hash >> bl.shift
  91. l := hash << bl.shift >> bl.shift
  92. for i := uint64(0); i < bl.setLocs; i++ {
  93. bl.Set((h + i*l) & bl.size)
  94. bl.ElemNum++
  95. }
  96. }
  97. // Has checks if bit(s) for entry hash is/are set,
  98. // returns true if the hash was added to the Bloom Filter.
  99. func (bl Bloom) Has(hash uint64) bool {
  100. h := hash >> bl.shift
  101. l := hash << bl.shift >> bl.shift
  102. for i := uint64(0); i < bl.setLocs; i++ {
  103. if !bl.IsSet((h + i*l) & bl.size) {
  104. return false
  105. }
  106. }
  107. return true
  108. }
  109. // AddIfNotHas only Adds hash, if it's not present in the bloomfilter.
  110. // Returns true if hash was added.
  111. // Returns false if hash was already registered in the bloomfilter.
  112. func (bl *Bloom) AddIfNotHas(hash uint64) bool {
  113. if bl.Has(hash) {
  114. return false
  115. }
  116. bl.Add(hash)
  117. return true
  118. }
  119. // TotalSize returns the total size of the bloom filter.
  120. func (bl *Bloom) TotalSize() int {
  121. // The bl struct has 5 members and each one is 8 byte. The bitset is a
  122. // uint64 byte slice.
  123. return len(bl.bitset)*8 + 5*8
  124. }
  125. // Size makes Bloom filter with as bitset of size sz.
  126. func (bl *Bloom) Size(sz uint64) {
  127. bl.bitset = make([]uint64, sz>>6)
  128. }
  129. // Clear resets the Bloom filter.
  130. func (bl *Bloom) Clear() {
  131. for i := range bl.bitset {
  132. bl.bitset[i] = 0
  133. }
  134. }
  135. // Set sets the bit[idx] of bitset.
  136. func (bl *Bloom) Set(idx uint64) {
  137. ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
  138. *(*uint8)(ptr) |= mask[idx%8]
  139. }
  140. // IsSet checks if bit[idx] of bitset is set, returns true/false.
  141. func (bl *Bloom) IsSet(idx uint64) bool {
  142. ptr := unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[idx>>6])) + uintptr((idx%64)>>3))
  143. r := ((*(*uint8)(ptr)) >> (idx % 8)) & 1
  144. return r == 1
  145. }
  146. // bloomJSONImExport
  147. // Im/Export structure used by JSONMarshal / JSONUnmarshal
  148. type bloomJSONImExport struct {
  149. FilterSet []byte
  150. SetLocs uint64
  151. }
  152. // NewWithBoolset takes a []byte slice and number of locs per entry,
  153. // returns the bloomfilter with a bitset populated according to the input []byte.
  154. func newWithBoolset(bs *[]byte, locs uint64) *Bloom {
  155. bloomfilter := NewBloomFilter(float64(len(*bs)<<3), float64(locs))
  156. for i, b := range *bs {
  157. *(*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(&bloomfilter.bitset[0])) + uintptr(i))) = b
  158. }
  159. return bloomfilter
  160. }
  161. // JSONUnmarshal takes JSON-Object (type bloomJSONImExport) as []bytes
  162. // returns bloom32 / bloom64 object.
  163. func JSONUnmarshal(dbData []byte) (*Bloom, error) {
  164. bloomImEx := bloomJSONImExport{}
  165. if err := json.Unmarshal(dbData, &bloomImEx); err != nil {
  166. return nil, err
  167. }
  168. buf := bytes.NewBuffer(bloomImEx.FilterSet)
  169. bs := buf.Bytes()
  170. bf := newWithBoolset(&bs, bloomImEx.SetLocs)
  171. return bf, nil
  172. }
  173. // JSONMarshal returns JSON-object (type bloomJSONImExport) as []byte.
  174. func (bl Bloom) JSONMarshal() []byte {
  175. bloomImEx := bloomJSONImExport{}
  176. bloomImEx.SetLocs = bl.setLocs
  177. bloomImEx.FilterSet = make([]byte, len(bl.bitset)<<3)
  178. for i := range bloomImEx.FilterSet {
  179. bloomImEx.FilterSet[i] = *(*byte)(unsafe.Pointer(uintptr(unsafe.Pointer(&bl.bitset[0])) +
  180. uintptr(i)))
  181. }
  182. data, err := json.Marshal(bloomImEx)
  183. if err != nil {
  184. log.Fatal("json.Marshal failed: ", err)
  185. }
  186. return data
  187. }