truncate.go 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282
  1. package ansi
  2. import (
  3. "bytes"
  4. "github.com/charmbracelet/x/ansi/parser"
  5. "github.com/mattn/go-runewidth"
  6. "github.com/rivo/uniseg"
  7. )
  8. // Cut the string, without adding any prefix or tail strings. This function is
  9. // aware of ANSI escape codes and will not break them, and accounts for
  10. // wide-characters (such as East-Asian characters and emojis). Note that the
  11. // [left] parameter is inclusive, while [right] isn't.
  12. // This treats the text as a sequence of graphemes.
  13. func Cut(s string, left, right int) string {
  14. return cut(GraphemeWidth, s, left, right)
  15. }
  16. // CutWc the string, without adding any prefix or tail strings. This function is
  17. // aware of ANSI escape codes and will not break them, and accounts for
  18. // wide-characters (such as East-Asian characters and emojis). Note that the
  19. // [left] parameter is inclusive, while [right] isn't.
  20. // This treats the text as a sequence of wide characters and runes.
  21. func CutWc(s string, left, right int) string {
  22. return cut(WcWidth, s, left, right)
  23. }
  24. func cut(m Method, s string, left, right int) string {
  25. if right <= left {
  26. return ""
  27. }
  28. truncate := Truncate
  29. truncateLeft := TruncateLeft
  30. if m == WcWidth {
  31. truncate = TruncateWc
  32. truncateLeft = TruncateWc
  33. }
  34. if left == 0 {
  35. return truncate(s, right, "")
  36. }
  37. return truncateLeft(Truncate(s, right, ""), left, "")
  38. }
  39. // Truncate truncates a string to a given length, adding a tail to the end if
  40. // the string is longer than the given length. This function is aware of ANSI
  41. // escape codes and will not break them, and accounts for wide-characters (such
  42. // as East-Asian characters and emojis).
  43. // This treats the text as a sequence of graphemes.
  44. func Truncate(s string, length int, tail string) string {
  45. return truncate(GraphemeWidth, s, length, tail)
  46. }
  47. // TruncateWc truncates a string to a given length, adding a tail to the end if
  48. // the string is longer than the given length. This function is aware of ANSI
  49. // escape codes and will not break them, and accounts for wide-characters (such
  50. // as East-Asian characters and emojis).
  51. // This treats the text as a sequence of wide characters and runes.
  52. func TruncateWc(s string, length int, tail string) string {
  53. return truncate(WcWidth, s, length, tail)
  54. }
  55. func truncate(m Method, s string, length int, tail string) string {
  56. if sw := StringWidth(s); sw <= length {
  57. return s
  58. }
  59. tw := StringWidth(tail)
  60. length -= tw
  61. if length < 0 {
  62. return ""
  63. }
  64. var cluster []byte
  65. var buf bytes.Buffer
  66. curWidth := 0
  67. ignoring := false
  68. pstate := parser.GroundState // initial state
  69. b := []byte(s)
  70. i := 0
  71. // Here we iterate over the bytes of the string and collect printable
  72. // characters and runes. We also keep track of the width of the string
  73. // in cells.
  74. //
  75. // Once we reach the given length, we start ignoring characters and only
  76. // collect ANSI escape codes until we reach the end of string.
  77. for i < len(b) {
  78. state, action := parser.Table.Transition(pstate, b[i])
  79. if state == parser.Utf8State {
  80. // This action happens when we transition to the Utf8State.
  81. var width int
  82. cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
  83. if m == WcWidth {
  84. width = runewidth.StringWidth(string(cluster))
  85. }
  86. // increment the index by the length of the cluster
  87. i += len(cluster)
  88. // Are we ignoring? Skip to the next byte
  89. if ignoring {
  90. continue
  91. }
  92. // Is this gonna be too wide?
  93. // If so write the tail and stop collecting.
  94. if curWidth+width > length && !ignoring {
  95. ignoring = true
  96. buf.WriteString(tail)
  97. }
  98. if curWidth+width > length {
  99. continue
  100. }
  101. curWidth += width
  102. buf.Write(cluster)
  103. // Done collecting, now we're back in the ground state.
  104. pstate = parser.GroundState
  105. continue
  106. }
  107. switch action {
  108. case parser.PrintAction:
  109. // Is this gonna be too wide?
  110. // If so write the tail and stop collecting.
  111. if curWidth >= length && !ignoring {
  112. ignoring = true
  113. buf.WriteString(tail)
  114. }
  115. // Skip to the next byte if we're ignoring
  116. if ignoring {
  117. i++
  118. continue
  119. }
  120. // collects printable ASCII
  121. curWidth++
  122. fallthrough
  123. default:
  124. buf.WriteByte(b[i])
  125. i++
  126. }
  127. // Transition to the next state.
  128. pstate = state
  129. // Once we reach the given length, we start ignoring runes and write
  130. // the tail to the buffer.
  131. if curWidth > length && !ignoring {
  132. ignoring = true
  133. buf.WriteString(tail)
  134. }
  135. }
  136. return buf.String()
  137. }
  138. // TruncateLeft truncates a string from the left side by removing n characters,
  139. // adding a prefix to the beginning if the string is longer than n.
  140. // This function is aware of ANSI escape codes and will not break them, and
  141. // accounts for wide-characters (such as East-Asian characters and emojis).
  142. // This treats the text as a sequence of graphemes.
  143. func TruncateLeft(s string, n int, prefix string) string {
  144. return truncateLeft(GraphemeWidth, s, n, prefix)
  145. }
  146. // TruncateLeftWc truncates a string from the left side by removing n characters,
  147. // adding a prefix to the beginning if the string is longer than n.
  148. // This function is aware of ANSI escape codes and will not break them, and
  149. // accounts for wide-characters (such as East-Asian characters and emojis).
  150. // This treats the text as a sequence of wide characters and runes.
  151. func TruncateLeftWc(s string, n int, prefix string) string {
  152. return truncateLeft(WcWidth, s, n, prefix)
  153. }
  154. func truncateLeft(m Method, s string, n int, prefix string) string {
  155. if n <= 0 {
  156. return s
  157. }
  158. var cluster []byte
  159. var buf bytes.Buffer
  160. curWidth := 0
  161. ignoring := true
  162. pstate := parser.GroundState
  163. b := []byte(s)
  164. i := 0
  165. for i < len(b) {
  166. if !ignoring {
  167. buf.Write(b[i:])
  168. break
  169. }
  170. state, action := parser.Table.Transition(pstate, b[i])
  171. if state == parser.Utf8State {
  172. var width int
  173. cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
  174. if m == WcWidth {
  175. width = runewidth.StringWidth(string(cluster))
  176. }
  177. i += len(cluster)
  178. curWidth += width
  179. if curWidth > n && ignoring {
  180. ignoring = false
  181. buf.WriteString(prefix)
  182. }
  183. if ignoring {
  184. continue
  185. }
  186. if curWidth > n {
  187. buf.Write(cluster)
  188. }
  189. pstate = parser.GroundState
  190. continue
  191. }
  192. switch action {
  193. case parser.PrintAction:
  194. curWidth++
  195. if curWidth > n && ignoring {
  196. ignoring = false
  197. buf.WriteString(prefix)
  198. }
  199. if ignoring {
  200. i++
  201. continue
  202. }
  203. fallthrough
  204. default:
  205. buf.WriteByte(b[i])
  206. i++
  207. }
  208. pstate = state
  209. if curWidth > n && ignoring {
  210. ignoring = false
  211. buf.WriteString(prefix)
  212. }
  213. }
  214. return buf.String()
  215. }
  216. // ByteToGraphemeRange takes start and stop byte positions and converts them to
  217. // grapheme-aware char positions.
  218. // You can use this with [Truncate], [TruncateLeft], and [Cut].
  219. func ByteToGraphemeRange(str string, byteStart, byteStop int) (charStart, charStop int) {
  220. bytePos, charPos := 0, 0
  221. gr := uniseg.NewGraphemes(str)
  222. for byteStart > bytePos {
  223. if !gr.Next() {
  224. break
  225. }
  226. bytePos += len(gr.Str())
  227. charPos += max(1, gr.Width())
  228. }
  229. charStart = charPos
  230. for byteStop > bytePos {
  231. if !gr.Next() {
  232. break
  233. }
  234. bytePos += len(gr.Str())
  235. charPos += max(1, gr.Width())
  236. }
  237. charStop = charPos
  238. return
  239. }