histogram.go 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. /*
  2. * Copyright 2020 Dgraph Labs, Inc. and Contributors
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. package z
  17. import (
  18. "fmt"
  19. "math"
  20. "strings"
  21. "github.com/dustin/go-humanize"
  22. )
  23. // Creates bounds for an histogram. The bounds are powers of two of the form
  24. // [2^min_exponent, ..., 2^max_exponent].
  25. func HistogramBounds(minExponent, maxExponent uint32) []float64 {
  26. var bounds []float64
  27. for i := minExponent; i <= maxExponent; i++ {
  28. bounds = append(bounds, float64(int(1)<<i))
  29. }
  30. return bounds
  31. }
  32. func Fibonacci(num int) []float64 {
  33. assert(num > 4)
  34. bounds := make([]float64, num)
  35. bounds[0] = 1
  36. bounds[1] = 2
  37. for i := 2; i < num; i++ {
  38. bounds[i] = bounds[i-1] + bounds[i-2]
  39. }
  40. return bounds
  41. }
  42. // HistogramData stores the information needed to represent the sizes of the keys and values
  43. // as a histogram.
  44. type HistogramData struct {
  45. Bounds []float64
  46. Count int64
  47. CountPerBucket []int64
  48. Min int64
  49. Max int64
  50. Sum int64
  51. }
  52. // NewHistogramData returns a new instance of HistogramData with properly initialized fields.
  53. func NewHistogramData(bounds []float64) *HistogramData {
  54. return &HistogramData{
  55. Bounds: bounds,
  56. CountPerBucket: make([]int64, len(bounds)+1),
  57. Max: 0,
  58. Min: math.MaxInt64,
  59. }
  60. }
  61. func (histogram *HistogramData) Copy() *HistogramData {
  62. if histogram == nil {
  63. return nil
  64. }
  65. return &HistogramData{
  66. Bounds: append([]float64{}, histogram.Bounds...),
  67. CountPerBucket: append([]int64{}, histogram.CountPerBucket...),
  68. Count: histogram.Count,
  69. Min: histogram.Min,
  70. Max: histogram.Max,
  71. Sum: histogram.Sum,
  72. }
  73. }
  74. // Update changes the Min and Max fields if value is less than or greater than the current values.
  75. func (histogram *HistogramData) Update(value int64) {
  76. if histogram == nil {
  77. return
  78. }
  79. if value > histogram.Max {
  80. histogram.Max = value
  81. }
  82. if value < histogram.Min {
  83. histogram.Min = value
  84. }
  85. histogram.Sum += value
  86. histogram.Count++
  87. for index := 0; index <= len(histogram.Bounds); index++ {
  88. // Allocate value in the last buckets if we reached the end of the Bounds array.
  89. if index == len(histogram.Bounds) {
  90. histogram.CountPerBucket[index]++
  91. break
  92. }
  93. if value < int64(histogram.Bounds[index]) {
  94. histogram.CountPerBucket[index]++
  95. break
  96. }
  97. }
  98. }
  99. // Mean returns the mean value for the histogram.
  100. func (histogram *HistogramData) Mean() float64 {
  101. if histogram.Count == 0 {
  102. return 0
  103. }
  104. return float64(histogram.Sum) / float64(histogram.Count)
  105. }
  106. // String converts the histogram data into human-readable string.
  107. func (histogram *HistogramData) String() string {
  108. if histogram == nil {
  109. return ""
  110. }
  111. var b strings.Builder
  112. b.WriteString("\n -- Histogram: \n")
  113. b.WriteString(fmt.Sprintf("Min value: %d \n", histogram.Min))
  114. b.WriteString(fmt.Sprintf("Max value: %d \n", histogram.Max))
  115. b.WriteString(fmt.Sprintf("Count: %d \n", histogram.Count))
  116. b.WriteString(fmt.Sprintf("50p: %.2f \n", histogram.Percentile(0.5)))
  117. b.WriteString(fmt.Sprintf("75p: %.2f \n", histogram.Percentile(0.75)))
  118. b.WriteString(fmt.Sprintf("90p: %.2f \n", histogram.Percentile(0.90)))
  119. numBounds := len(histogram.Bounds)
  120. var cum float64
  121. for index, count := range histogram.CountPerBucket {
  122. if count == 0 {
  123. continue
  124. }
  125. // The last bucket represents the bucket that contains the range from
  126. // the last bound up to infinity so it's processed differently than the
  127. // other buckets.
  128. if index == len(histogram.CountPerBucket)-1 {
  129. lowerBound := uint64(histogram.Bounds[numBounds-1])
  130. page := float64(count*100) / float64(histogram.Count)
  131. cum += page
  132. b.WriteString(fmt.Sprintf("[%s, %s) %d %.2f%% %.2f%%\n",
  133. humanize.IBytes(lowerBound), "infinity", count, page, cum))
  134. continue
  135. }
  136. upperBound := uint64(histogram.Bounds[index])
  137. lowerBound := uint64(0)
  138. if index > 0 {
  139. lowerBound = uint64(histogram.Bounds[index-1])
  140. }
  141. page := float64(count*100) / float64(histogram.Count)
  142. cum += page
  143. b.WriteString(fmt.Sprintf("[%d, %d) %d %.2f%% %.2f%%\n",
  144. lowerBound, upperBound, count, page, cum))
  145. }
  146. b.WriteString(" --\n")
  147. return b.String()
  148. }
  149. // Percentile returns the percentile value for the histogram.
  150. // value of p should be between [0.0-1.0]
  151. func (histogram *HistogramData) Percentile(p float64) float64 {
  152. if histogram == nil {
  153. return 0
  154. }
  155. if histogram.Count == 0 {
  156. // if no data return the minimum range
  157. return histogram.Bounds[0]
  158. }
  159. pval := int64(float64(histogram.Count) * p)
  160. for i, v := range histogram.CountPerBucket {
  161. pval = pval - v
  162. if pval <= 0 {
  163. if i == len(histogram.Bounds) {
  164. break
  165. }
  166. return histogram.Bounds[i]
  167. }
  168. }
  169. // default return should be the max range
  170. return histogram.Bounds[len(histogram.Bounds)-1]
  171. }
  172. // Clear reset the histogram. Helpful in situations where we need to reset the metrics
  173. func (histogram *HistogramData) Clear() {
  174. if histogram == nil {
  175. return
  176. }
  177. histogram.Count = 0
  178. histogram.CountPerBucket = make([]int64, len(histogram.Bounds)+1)
  179. histogram.Sum = 0
  180. histogram.Max = 0
  181. histogram.Min = math.MaxInt64
  182. }