fuzzy.go 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235
  1. /*
  2. Package fuzzy provides fuzzy string matching optimized
  3. for filenames and code symbols in the style of Sublime Text,
  4. VSCode, IntelliJ IDEA et al.
  5. */
  6. package fuzzy
  7. import (
  8. "sort"
  9. "unicode"
  10. "unicode/utf8"
  11. )
  12. // Match represents a matched string.
  13. type Match struct {
  14. // The matched string.
  15. Str string
  16. // The index of the matched string in the supplied slice.
  17. Index int
  18. // The indexes of matched characters. Useful for highlighting matches.
  19. MatchedIndexes []int
  20. // Score used to rank matches
  21. Score int
  22. }
  23. const (
  24. firstCharMatchBonus = 10
  25. matchFollowingSeparatorBonus = 20
  26. camelCaseMatchBonus = 20
  27. adjacentMatchBonus = 5
  28. unmatchedLeadingCharPenalty = -5
  29. maxUnmatchedLeadingCharPenalty = -15
  30. )
  31. var separators = []rune("/-_ .\\")
  32. // Matches is a slice of Match structs
  33. type Matches []Match
  34. func (a Matches) Len() int { return len(a) }
  35. func (a Matches) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
  36. func (a Matches) Less(i, j int) bool { return a[i].Score >= a[j].Score }
  37. // Source represents an abstract source of a list of strings. Source must be iterable type such as a slice.
  38. // The source will be iterated over till Len() with String(i) being called for each element where i is the
  39. // index of the element. You can find a working example in the README.
  40. type Source interface {
  41. // The string to be matched at position i.
  42. String(i int) string
  43. // The length of the source. Typically is the length of the slice of things that you want to match.
  44. Len() int
  45. }
  46. type stringSource []string
  47. func (ss stringSource) String(i int) string {
  48. return ss[i]
  49. }
  50. func (ss stringSource) Len() int { return len(ss) }
  51. /*
  52. Find looks up pattern in data and returns matches
  53. in descending order of match quality. Match quality
  54. is determined by a set of bonus and penalty rules.
  55. The following types of matches apply a bonus:
  56. * The first character in the pattern matches the first character in the match string.
  57. * The matched character is camel cased.
  58. * The matched character follows a separator such as an underscore character.
  59. * The matched character is adjacent to a previous match.
  60. Penalties are applied for every character in the search string that wasn't matched and all leading
  61. characters upto the first match.
  62. */
  63. func Find(pattern string, data []string) Matches {
  64. return FindFrom(pattern, stringSource(data))
  65. }
  66. /*
  67. FindFrom is an alternative implementation of Find using a Source
  68. instead of a list of strings.
  69. */
  70. func FindFrom(pattern string, data Source) Matches {
  71. if len(pattern) == 0 {
  72. return nil
  73. }
  74. runes := []rune(pattern)
  75. var matches Matches
  76. var matchedIndexes []int
  77. for i := 0; i < data.Len(); i++ {
  78. var match Match
  79. match.Str = data.String(i)
  80. match.Index = i
  81. if matchedIndexes != nil {
  82. match.MatchedIndexes = matchedIndexes
  83. } else {
  84. match.MatchedIndexes = make([]int, 0, len(runes))
  85. }
  86. var score int
  87. patternIndex := 0
  88. bestScore := -1
  89. matchedIndex := -1
  90. currAdjacentMatchBonus := 0
  91. var last rune
  92. var lastIndex int
  93. nextc, nextSize := utf8.DecodeRuneInString(data.String(i))
  94. var candidate rune
  95. var candidateSize int
  96. for j := 0; j < len(data.String(i)); j += candidateSize {
  97. candidate, candidateSize = nextc, nextSize
  98. if equalFold(candidate, runes[patternIndex]) {
  99. score = 0
  100. if j == 0 {
  101. score += firstCharMatchBonus
  102. }
  103. if unicode.IsLower(last) && unicode.IsUpper(candidate) {
  104. score += camelCaseMatchBonus
  105. }
  106. if j != 0 && isSeparator(last) {
  107. score += matchFollowingSeparatorBonus
  108. }
  109. if len(match.MatchedIndexes) > 0 {
  110. lastMatch := match.MatchedIndexes[len(match.MatchedIndexes)-1]
  111. bonus := adjacentCharBonus(lastIndex, lastMatch, currAdjacentMatchBonus)
  112. score += bonus
  113. // adjacent matches are incremental and keep increasing based on previous adjacent matches
  114. // thus we need to maintain the current match bonus
  115. currAdjacentMatchBonus += bonus
  116. }
  117. if score > bestScore {
  118. bestScore = score
  119. matchedIndex = j
  120. }
  121. }
  122. var nextp rune
  123. if patternIndex < len(runes)-1 {
  124. nextp = runes[patternIndex+1]
  125. }
  126. if j+candidateSize < len(data.String(i)) {
  127. if data.String(i)[j+candidateSize] < utf8.RuneSelf { // Fast path for ASCII
  128. nextc, nextSize = rune(data.String(i)[j+candidateSize]), 1
  129. } else {
  130. nextc, nextSize = utf8.DecodeRuneInString(data.String(i)[j+candidateSize:])
  131. }
  132. } else {
  133. nextc, nextSize = 0, 0
  134. }
  135. // We apply the best score when we have the next match coming up or when the search string has ended.
  136. // Tracking when the next match is coming up allows us to exhaustively find the best match and not necessarily
  137. // the first match.
  138. // For example given the pattern "tk" and search string "The Black Knight", exhaustively matching allows us
  139. // to match the second k thus giving this string a higher score.
  140. if equalFold(nextp, nextc) || nextc == 0 {
  141. if matchedIndex > -1 {
  142. if len(match.MatchedIndexes) == 0 {
  143. penalty := matchedIndex * unmatchedLeadingCharPenalty
  144. bestScore += max(penalty, maxUnmatchedLeadingCharPenalty)
  145. }
  146. match.Score += bestScore
  147. match.MatchedIndexes = append(match.MatchedIndexes, matchedIndex)
  148. score = 0
  149. bestScore = -1
  150. patternIndex++
  151. }
  152. }
  153. lastIndex = j
  154. last = candidate
  155. }
  156. // apply penalty for each unmatched character
  157. penalty := len(match.MatchedIndexes) - len(data.String(i))
  158. match.Score += penalty
  159. if len(match.MatchedIndexes) == len(runes) {
  160. matches = append(matches, match)
  161. matchedIndexes = nil
  162. } else {
  163. matchedIndexes = match.MatchedIndexes[:0] // Recycle match index slice
  164. }
  165. }
  166. sort.Stable(matches)
  167. return matches
  168. }
  169. // Taken from strings.EqualFold
  170. func equalFold(tr, sr rune) bool {
  171. if tr == sr {
  172. return true
  173. }
  174. if tr < sr {
  175. tr, sr = sr, tr
  176. }
  177. // Fast check for ASCII.
  178. if tr < utf8.RuneSelf {
  179. // ASCII, and sr is upper case. tr must be lower case.
  180. if 'A' <= sr && sr <= 'Z' && tr == sr+'a'-'A' {
  181. return true
  182. }
  183. return false
  184. }
  185. // General case. SimpleFold(x) returns the next equivalent rune > x
  186. // or wraps around to smaller values.
  187. r := unicode.SimpleFold(sr)
  188. for r != sr && r < tr {
  189. r = unicode.SimpleFold(r)
  190. }
  191. return r == tr
  192. }
  193. func adjacentCharBonus(i int, lastMatch int, currentBonus int) int {
  194. if lastMatch == i {
  195. return currentBonus*2 + adjacentMatchBonus
  196. }
  197. return 0
  198. }
  199. func isSeparator(s rune) bool {
  200. for _, sep := range separators {
  201. if s == sep {
  202. return true
  203. }
  204. }
  205. return false
  206. }
  207. func max(x int, y int) int {
  208. if x > y {
  209. return x
  210. }
  211. return y
  212. }