| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170 |
- // Copyright 2013 The LevelDB-Go Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package y
- import "math"
- // Filter is an encoded set of []byte keys.
- type Filter []byte
- func (f Filter) MayContainKey(k []byte) bool {
- return f.MayContain(Hash(k))
- }
- // MayContain returns whether the filter may contain given key. False positives
- // are possible, where it returns true for keys not in the original set.
- func (f Filter) MayContain(h uint32) bool {
- if len(f) < 2 {
- return false
- }
- k := f[len(f)-1]
- if k > 30 {
- // This is reserved for potentially new encodings for short Bloom filters.
- // Consider it a match.
- return true
- }
- nBits := uint32(8 * (len(f) - 1))
- delta := h>>17 | h<<15
- for j := uint8(0); j < k; j++ {
- bitPos := h % nBits
- if f[bitPos/8]&(1<<(bitPos%8)) == 0 {
- return false
- }
- h += delta
- }
- return true
- }
- // NewFilter returns a new Bloom filter that encodes a set of []byte keys with
- // the given number of bits per key, approximately.
- //
- // A good bitsPerKey value is 10, which yields a filter with ~ 1% false
- // positive rate.
- func NewFilter(keys []uint32, bitsPerKey int) Filter {
- return Filter(appendFilter(nil, keys, bitsPerKey))
- }
- // BloomBitsPerKey returns the bits per key required by bloomfilter based on
- // the false positive rate.
- func BloomBitsPerKey(numEntries int, fp float64) int {
- size := -1 * float64(numEntries) * math.Log(fp) / math.Pow(float64(0.69314718056), 2)
- locs := math.Ceil(float64(0.69314718056) * size / float64(numEntries))
- return int(locs)
- }
- func appendFilter(buf []byte, keys []uint32, bitsPerKey int) []byte {
- if bitsPerKey < 0 {
- bitsPerKey = 0
- }
- // 0.69 is approximately ln(2).
- k := uint32(float64(bitsPerKey) * 0.69)
- if k < 1 {
- k = 1
- }
- if k > 30 {
- k = 30
- }
- nBits := len(keys) * bitsPerKey
- // For small len(keys), we can see a very high false positive rate. Fix it
- // by enforcing a minimum bloom filter length.
- if nBits < 64 {
- nBits = 64
- }
- nBytes := (nBits + 7) / 8
- nBits = nBytes * 8
- buf, filter := extend(buf, nBytes+1)
- for _, h := range keys {
- delta := h>>17 | h<<15
- for j := uint32(0); j < k; j++ {
- bitPos := h % uint32(nBits)
- filter[bitPos/8] |= 1 << (bitPos % 8)
- h += delta
- }
- }
- filter[nBytes] = uint8(k)
- return buf
- }
- // extend appends n zero bytes to b. It returns the overall slice (of length
- // n+len(originalB)) and the slice of n trailing zeroes.
- func extend(b []byte, n int) (overall, trailer []byte) {
- want := n + len(b)
- if want <= cap(b) {
- overall = b[:want]
- trailer = overall[len(b):]
- for i := range trailer {
- trailer[i] = 0
- }
- } else {
- // Grow the capacity exponentially, with a 1KiB minimum.
- c := 1024
- for c < want {
- c += c / 4
- }
- overall = make([]byte, want, c)
- trailer = overall[len(b):]
- copy(overall, b)
- }
- return overall, trailer
- }
- // hash implements a hashing algorithm similar to the Murmur hash.
- func Hash(b []byte) uint32 {
- const (
- seed = 0xbc9f1d34
- m = 0xc6a4a793
- )
- h := uint32(seed) ^ uint32(len(b))*m
- for ; len(b) >= 4; b = b[4:] {
- h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
- h *= m
- h ^= h >> 16
- }
- switch len(b) {
- case 3:
- h += uint32(b[2]) << 16
- fallthrough
- case 2:
- h += uint32(b[1]) << 8
- fallthrough
- case 1:
- h += uint32(b[0])
- h *= m
- h ^= h >> 24
- }
- return h
- }
- // FilterPolicy implements the db.FilterPolicy interface from the leveldb/db
- // package.
- //
- // The integer value is the approximate number of bits used per key. A good
- // value is 10, which yields a filter with ~ 1% false positive rate.
- //
- // It is valid to use the other API in this package (leveldb/bloom) without
- // using this type or the leveldb/db package.
- // type FilterPolicy int
- // // Name implements the db.FilterPolicy interface.
- // func (p FilterPolicy) Name() string {
- // // This string looks arbitrary, but its value is written to LevelDB .ldb
- // // files, and should be this exact value to be compatible with those files
- // // and with the C++ LevelDB code.
- // return "leveldb.BuiltinBloomFilter2"
- // }
- // // AppendFilter implements the db.FilterPolicy interface.
- // func (p FilterPolicy) AppendFilter(dst []byte, keys [][]byte) []byte {
- // return appendFilter(dst, keys, int(p))
- // }
- // // MayContain implements the db.FilterPolicy interface.
- // func (p FilterPolicy) MayContain(filter, key []byte) bool {
- // return Filter(filter).MayContain(key)
- // }
|