histogram.go 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169
  1. /*
  2. * Copyright 2019 Dgraph Labs, Inc. and Contributors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package badger
  17. import (
  18. "fmt"
  19. "math"
  20. )
  21. // PrintHistogram builds and displays the key-value size histogram.
  22. // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
  23. // considered for creating the histogram
  24. func (db *DB) PrintHistogram(keyPrefix []byte) {
  25. if db == nil {
  26. fmt.Println("\nCannot build histogram: DB is nil.")
  27. return
  28. }
  29. histogram := db.buildHistogram(keyPrefix)
  30. fmt.Printf("Histogram of key sizes (in bytes)\n")
  31. histogram.keySizeHistogram.printHistogram()
  32. fmt.Printf("Histogram of value sizes (in bytes)\n")
  33. histogram.valueSizeHistogram.printHistogram()
  34. }
  35. // histogramData stores information about a histogram
  36. type histogramData struct {
  37. bins []int64
  38. countPerBin []int64
  39. totalCount int64
  40. min int64
  41. max int64
  42. sum int64
  43. }
  44. // sizeHistogram contains keySize histogram and valueSize histogram
  45. type sizeHistogram struct {
  46. keySizeHistogram, valueSizeHistogram histogramData
  47. }
  48. // newSizeHistogram returns a new instance of keyValueSizeHistogram with
  49. // properly initialized fields.
  50. func newSizeHistogram() *sizeHistogram {
  51. // TODO(ibrahim): find appropriate bin size.
  52. keyBins := createHistogramBins(1, 16)
  53. valueBins := createHistogramBins(1, 30)
  54. return &sizeHistogram{
  55. keySizeHistogram: histogramData{
  56. bins: keyBins,
  57. countPerBin: make([]int64, len(keyBins)+1),
  58. max: math.MinInt64,
  59. min: math.MaxInt64,
  60. sum: 0,
  61. },
  62. valueSizeHistogram: histogramData{
  63. bins: valueBins,
  64. countPerBin: make([]int64, len(valueBins)+1),
  65. max: math.MinInt64,
  66. min: math.MaxInt64,
  67. sum: 0,
  68. },
  69. }
  70. }
  71. // createHistogramBins creates bins for an histogram. The bin sizes are powers
  72. // of two of the form [2^min_exponent, ..., 2^max_exponent].
  73. func createHistogramBins(minExponent, maxExponent uint32) []int64 {
  74. var bins []int64
  75. for i := minExponent; i <= maxExponent; i++ {
  76. bins = append(bins, int64(1)<<i)
  77. }
  78. return bins
  79. }
  80. // Update the min and max fields if value is less than or greater than the
  81. // current min/max value.
  82. func (histogram *histogramData) Update(value int64) {
  83. if value > histogram.max {
  84. histogram.max = value
  85. }
  86. if value < histogram.min {
  87. histogram.min = value
  88. }
  89. histogram.sum += value
  90. histogram.totalCount++
  91. for index := 0; index <= len(histogram.bins); index++ {
  92. // Allocate value in the last buckets if we reached the end of the Bounds array.
  93. if index == len(histogram.bins) {
  94. histogram.countPerBin[index]++
  95. break
  96. }
  97. // Check if the value should be added to the "index" bin
  98. if value < histogram.bins[index] {
  99. histogram.countPerBin[index]++
  100. break
  101. }
  102. }
  103. }
  104. // buildHistogram builds the key-value size histogram.
  105. // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
  106. // considered for creating the histogram
  107. func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram {
  108. txn := db.NewTransaction(false)
  109. defer txn.Discard()
  110. itr := txn.NewIterator(DefaultIteratorOptions)
  111. defer itr.Close()
  112. badgerHistogram := newSizeHistogram()
  113. // Collect key and value sizes.
  114. for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
  115. item := itr.Item()
  116. badgerHistogram.keySizeHistogram.Update(item.KeySize())
  117. badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
  118. }
  119. return badgerHistogram
  120. }
  121. // printHistogram prints the histogram data in a human-readable format.
  122. func (histogram histogramData) printHistogram() {
  123. fmt.Printf("Total count: %d\n", histogram.totalCount)
  124. fmt.Printf("Min value: %d\n", histogram.min)
  125. fmt.Printf("Max value: %d\n", histogram.max)
  126. fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
  127. fmt.Printf("%24s %9s\n", "Range", "Count")
  128. numBins := len(histogram.bins)
  129. for index, count := range histogram.countPerBin {
  130. if count == 0 {
  131. continue
  132. }
  133. // The last bin represents the bin that contains the range from
  134. // the last bin up to infinity so it's processed differently than the
  135. // other bins.
  136. if index == len(histogram.countPerBin)-1 {
  137. lowerBound := int(histogram.bins[numBins-1])
  138. fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
  139. continue
  140. }
  141. upperBound := int(histogram.bins[index])
  142. lowerBound := 0
  143. if index > 0 {
  144. lowerBound = int(histogram.bins[index-1])
  145. }
  146. fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
  147. }
  148. fmt.Println()
  149. }