histogram.go 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /*
  2. * SPDX-FileCopyrightText: © Hypermode Inc. <hello@hypermode.com>
  3. * SPDX-License-Identifier: Apache-2.0
  4. */
  5. package badger
  6. import (
  7. "fmt"
  8. "math"
  9. )
  10. // PrintHistogram builds and displays the key-value size histogram.
  11. // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
  12. // considered for creating the histogram
  13. func (db *DB) PrintHistogram(keyPrefix []byte) {
  14. if db == nil {
  15. fmt.Println("\nCannot build histogram: DB is nil.")
  16. return
  17. }
  18. histogram := db.buildHistogram(keyPrefix)
  19. fmt.Printf("Histogram of key sizes (in bytes)\n")
  20. histogram.keySizeHistogram.printHistogram()
  21. fmt.Printf("Histogram of value sizes (in bytes)\n")
  22. histogram.valueSizeHistogram.printHistogram()
  23. }
  24. // histogramData stores information about a histogram
  25. type histogramData struct {
  26. bins []int64
  27. countPerBin []int64
  28. totalCount int64
  29. min int64
  30. max int64
  31. sum int64
  32. }
  33. // sizeHistogram contains keySize histogram and valueSize histogram
  34. type sizeHistogram struct {
  35. keySizeHistogram, valueSizeHistogram histogramData
  36. }
  37. // newSizeHistogram returns a new instance of keyValueSizeHistogram with
  38. // properly initialized fields.
  39. func newSizeHistogram() *sizeHistogram {
  40. // TODO(ibrahim): find appropriate bin size.
  41. keyBins := createHistogramBins(1, 16)
  42. valueBins := createHistogramBins(1, 30)
  43. return &sizeHistogram{
  44. keySizeHistogram: histogramData{
  45. bins: keyBins,
  46. countPerBin: make([]int64, len(keyBins)+1),
  47. max: math.MinInt64,
  48. min: math.MaxInt64,
  49. sum: 0,
  50. },
  51. valueSizeHistogram: histogramData{
  52. bins: valueBins,
  53. countPerBin: make([]int64, len(valueBins)+1),
  54. max: math.MinInt64,
  55. min: math.MaxInt64,
  56. sum: 0,
  57. },
  58. }
  59. }
  60. // createHistogramBins creates bins for an histogram. The bin sizes are powers
  61. // of two of the form [2^min_exponent, ..., 2^max_exponent].
  62. func createHistogramBins(minExponent, maxExponent uint32) []int64 {
  63. var bins []int64
  64. for i := minExponent; i <= maxExponent; i++ {
  65. bins = append(bins, int64(1)<<i)
  66. }
  67. return bins
  68. }
  69. // Update the min and max fields if value is less than or greater than the
  70. // current min/max value.
  71. func (histogram *histogramData) Update(value int64) {
  72. if value > histogram.max {
  73. histogram.max = value
  74. }
  75. if value < histogram.min {
  76. histogram.min = value
  77. }
  78. histogram.sum += value
  79. histogram.totalCount++
  80. for index := 0; index <= len(histogram.bins); index++ {
  81. // Allocate value in the last buckets if we reached the end of the Bounds array.
  82. if index == len(histogram.bins) {
  83. histogram.countPerBin[index]++
  84. break
  85. }
  86. // Check if the value should be added to the "index" bin
  87. if value < histogram.bins[index] {
  88. histogram.countPerBin[index]++
  89. break
  90. }
  91. }
  92. }
  93. // buildHistogram builds the key-value size histogram.
  94. // When keyPrefix is set, only the keys that have prefix "keyPrefix" are
  95. // considered for creating the histogram
  96. func (db *DB) buildHistogram(keyPrefix []byte) *sizeHistogram {
  97. txn := db.NewTransaction(false)
  98. defer txn.Discard()
  99. itr := txn.NewIterator(DefaultIteratorOptions)
  100. defer itr.Close()
  101. badgerHistogram := newSizeHistogram()
  102. // Collect key and value sizes.
  103. for itr.Seek(keyPrefix); itr.ValidForPrefix(keyPrefix); itr.Next() {
  104. item := itr.Item()
  105. badgerHistogram.keySizeHistogram.Update(item.KeySize())
  106. badgerHistogram.valueSizeHistogram.Update(item.ValueSize())
  107. }
  108. return badgerHistogram
  109. }
  110. // printHistogram prints the histogram data in a human-readable format.
  111. func (histogram histogramData) printHistogram() {
  112. fmt.Printf("Total count: %d\n", histogram.totalCount)
  113. fmt.Printf("Min value: %d\n", histogram.min)
  114. fmt.Printf("Max value: %d\n", histogram.max)
  115. fmt.Printf("Mean: %.2f\n", float64(histogram.sum)/float64(histogram.totalCount))
  116. fmt.Printf("%24s %9s\n", "Range", "Count")
  117. numBins := len(histogram.bins)
  118. for index, count := range histogram.countPerBin {
  119. if count == 0 {
  120. continue
  121. }
  122. // The last bin represents the bin that contains the range from
  123. // the last bin up to infinity so it's processed differently than the
  124. // other bins.
  125. if index == len(histogram.countPerBin)-1 {
  126. lowerBound := int(histogram.bins[numBins-1])
  127. fmt.Printf("[%10d, %10s) %9d\n", lowerBound, "infinity", count)
  128. continue
  129. }
  130. upperBound := int(histogram.bins[index])
  131. lowerBound := 0
  132. if index > 0 {
  133. lowerBound = int(histogram.bins[index-1])
  134. }
  135. fmt.Printf("[%10d, %10d) %9d\n", lowerBound, upperBound, count)
  136. }
  137. fmt.Println()
  138. }