wrap.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. package ansi
  2. import (
  3. "bytes"
  4. "unicode"
  5. "unicode/utf8"
  6. "github.com/charmbracelet/x/ansi/parser"
  7. "github.com/mattn/go-runewidth"
  8. "github.com/rivo/uniseg"
  9. )
  10. // nbsp is a non-breaking space
  11. const nbsp = 0xA0
  12. // Hardwrap wraps a string or a block of text to a given line length, breaking
  13. // word boundaries. This will preserve ANSI escape codes and will account for
  14. // wide-characters in the string.
  15. // When preserveSpace is true, spaces at the beginning of a line will be
  16. // preserved.
  17. // This treats the text as a sequence of graphemes.
  18. func Hardwrap(s string, limit int, preserveSpace bool) string {
  19. return hardwrap(GraphemeWidth, s, limit, preserveSpace)
  20. }
  21. // HardwrapWc wraps a string or a block of text to a given line length, breaking
  22. // word boundaries. This will preserve ANSI escape codes and will account for
  23. // wide-characters in the string.
  24. // When preserveSpace is true, spaces at the beginning of a line will be
  25. // preserved.
  26. // This treats the text as a sequence of wide characters and runes.
  27. func HardwrapWc(s string, limit int, preserveSpace bool) string {
  28. return hardwrap(WcWidth, s, limit, preserveSpace)
  29. }
  30. func hardwrap(m Method, s string, limit int, preserveSpace bool) string {
  31. if limit < 1 {
  32. return s
  33. }
  34. var (
  35. cluster []byte
  36. buf bytes.Buffer
  37. curWidth int
  38. forceNewline bool
  39. pstate = parser.GroundState // initial state
  40. b = []byte(s)
  41. )
  42. addNewline := func() {
  43. buf.WriteByte('\n')
  44. curWidth = 0
  45. }
  46. i := 0
  47. for i < len(b) {
  48. state, action := parser.Table.Transition(pstate, b[i])
  49. if state == parser.Utf8State {
  50. var width int
  51. cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
  52. if m == WcWidth {
  53. width = runewidth.StringWidth(string(cluster))
  54. }
  55. i += len(cluster)
  56. if curWidth+width > limit {
  57. addNewline()
  58. }
  59. if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
  60. // Skip spaces at the beginning of a line
  61. if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
  62. pstate = parser.GroundState
  63. continue
  64. }
  65. }
  66. buf.Write(cluster)
  67. curWidth += width
  68. pstate = parser.GroundState
  69. continue
  70. }
  71. switch action {
  72. case parser.PrintAction, parser.ExecuteAction:
  73. if b[i] == '\n' {
  74. addNewline()
  75. forceNewline = false
  76. break
  77. }
  78. if curWidth+1 > limit {
  79. addNewline()
  80. forceNewline = true
  81. }
  82. // Skip spaces at the beginning of a line
  83. if curWidth == 0 {
  84. if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
  85. break
  86. }
  87. forceNewline = false
  88. }
  89. buf.WriteByte(b[i])
  90. if action == parser.PrintAction {
  91. curWidth++
  92. }
  93. default:
  94. buf.WriteByte(b[i])
  95. }
  96. // We manage the UTF8 state separately manually above.
  97. if pstate != parser.Utf8State {
  98. pstate = state
  99. }
  100. i++
  101. }
  102. return buf.String()
  103. }
  104. // Wordwrap wraps a string or a block of text to a given line length, not
  105. // breaking word boundaries. This will preserve ANSI escape codes and will
  106. // account for wide-characters in the string.
  107. // The breakpoints string is a list of characters that are considered
  108. // breakpoints for word wrapping. A hyphen (-) is always considered a
  109. // breakpoint.
  110. //
  111. // Note: breakpoints must be a string of 1-cell wide rune characters.
  112. //
  113. // This treats the text as a sequence of graphemes.
  114. func Wordwrap(s string, limit int, breakpoints string) string {
  115. return wordwrap(GraphemeWidth, s, limit, breakpoints)
  116. }
  117. // WordwrapWc wraps a string or a block of text to a given line length, not
  118. // breaking word boundaries. This will preserve ANSI escape codes and will
  119. // account for wide-characters in the string.
  120. // The breakpoints string is a list of characters that are considered
  121. // breakpoints for word wrapping. A hyphen (-) is always considered a
  122. // breakpoint.
  123. //
  124. // Note: breakpoints must be a string of 1-cell wide rune characters.
  125. //
  126. // This treats the text as a sequence of wide characters and runes.
  127. func WordwrapWc(s string, limit int, breakpoints string) string {
  128. return wordwrap(WcWidth, s, limit, breakpoints)
  129. }
  130. func wordwrap(m Method, s string, limit int, breakpoints string) string {
  131. if limit < 1 {
  132. return s
  133. }
  134. var (
  135. cluster []byte
  136. buf bytes.Buffer
  137. word bytes.Buffer
  138. space bytes.Buffer
  139. curWidth int
  140. wordLen int
  141. pstate = parser.GroundState // initial state
  142. b = []byte(s)
  143. )
  144. addSpace := func() {
  145. curWidth += space.Len()
  146. buf.Write(space.Bytes())
  147. space.Reset()
  148. }
  149. addWord := func() {
  150. if word.Len() == 0 {
  151. return
  152. }
  153. addSpace()
  154. curWidth += wordLen
  155. buf.Write(word.Bytes())
  156. word.Reset()
  157. wordLen = 0
  158. }
  159. addNewline := func() {
  160. buf.WriteByte('\n')
  161. curWidth = 0
  162. space.Reset()
  163. }
  164. i := 0
  165. for i < len(b) {
  166. state, action := parser.Table.Transition(pstate, b[i])
  167. if state == parser.Utf8State {
  168. var width int
  169. cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
  170. if m == WcWidth {
  171. width = runewidth.StringWidth(string(cluster))
  172. }
  173. i += len(cluster)
  174. r, _ := utf8.DecodeRune(cluster)
  175. if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
  176. addWord()
  177. space.WriteRune(r)
  178. } else if bytes.ContainsAny(cluster, breakpoints) {
  179. addSpace()
  180. addWord()
  181. buf.Write(cluster)
  182. curWidth++
  183. } else {
  184. word.Write(cluster)
  185. wordLen += width
  186. if curWidth+space.Len()+wordLen > limit &&
  187. wordLen < limit {
  188. addNewline()
  189. }
  190. }
  191. pstate = parser.GroundState
  192. continue
  193. }
  194. switch action {
  195. case parser.PrintAction, parser.ExecuteAction:
  196. r := rune(b[i])
  197. switch {
  198. case r == '\n':
  199. if wordLen == 0 {
  200. if curWidth+space.Len() > limit {
  201. curWidth = 0
  202. } else {
  203. buf.Write(space.Bytes())
  204. }
  205. space.Reset()
  206. }
  207. addWord()
  208. addNewline()
  209. case unicode.IsSpace(r):
  210. addWord()
  211. space.WriteByte(b[i])
  212. case r == '-':
  213. fallthrough
  214. case runeContainsAny(r, breakpoints):
  215. addSpace()
  216. addWord()
  217. buf.WriteByte(b[i])
  218. curWidth++
  219. default:
  220. word.WriteByte(b[i])
  221. wordLen++
  222. if curWidth+space.Len()+wordLen > limit &&
  223. wordLen < limit {
  224. addNewline()
  225. }
  226. }
  227. default:
  228. word.WriteByte(b[i])
  229. }
  230. // We manage the UTF8 state separately manually above.
  231. if pstate != parser.Utf8State {
  232. pstate = state
  233. }
  234. i++
  235. }
  236. addWord()
  237. return buf.String()
  238. }
  239. // Wrap wraps a string or a block of text to a given line length, breaking word
  240. // boundaries if necessary. This will preserve ANSI escape codes and will
  241. // account for wide-characters in the string. The breakpoints string is a list
  242. // of characters that are considered breakpoints for word wrapping. A hyphen
  243. // (-) is always considered a breakpoint.
  244. //
  245. // Note: breakpoints must be a string of 1-cell wide rune characters.
  246. //
  247. // This treats the text as a sequence of graphemes.
  248. func Wrap(s string, limit int, breakpoints string) string {
  249. return wrap(GraphemeWidth, s, limit, breakpoints)
  250. }
  251. // WrapWc wraps a string or a block of text to a given line length, breaking word
  252. // boundaries if necessary. This will preserve ANSI escape codes and will
  253. // account for wide-characters in the string. The breakpoints string is a list
  254. // of characters that are considered breakpoints for word wrapping. A hyphen
  255. // (-) is always considered a breakpoint.
  256. //
  257. // Note: breakpoints must be a string of 1-cell wide rune characters.
  258. //
  259. // This treats the text as a sequence of wide characters and runes.
  260. func WrapWc(s string, limit int, breakpoints string) string {
  261. return wrap(WcWidth, s, limit, breakpoints)
  262. }
  263. func wrap(m Method, s string, limit int, breakpoints string) string {
  264. if limit < 1 {
  265. return s
  266. }
  267. var (
  268. cluster []byte
  269. buf bytes.Buffer
  270. word bytes.Buffer
  271. space bytes.Buffer
  272. curWidth int // written width of the line
  273. wordLen int // word buffer len without ANSI escape codes
  274. pstate = parser.GroundState // initial state
  275. b = []byte(s)
  276. )
  277. addSpace := func() {
  278. curWidth += space.Len()
  279. buf.Write(space.Bytes())
  280. space.Reset()
  281. }
  282. addWord := func() {
  283. if word.Len() == 0 {
  284. return
  285. }
  286. addSpace()
  287. curWidth += wordLen
  288. buf.Write(word.Bytes())
  289. word.Reset()
  290. wordLen = 0
  291. }
  292. addNewline := func() {
  293. buf.WriteByte('\n')
  294. curWidth = 0
  295. space.Reset()
  296. }
  297. i := 0
  298. for i < len(b) {
  299. state, action := parser.Table.Transition(pstate, b[i])
  300. if state == parser.Utf8State {
  301. var width int
  302. cluster, _, width, _ = uniseg.FirstGraphemeCluster(b[i:], -1)
  303. if m == WcWidth {
  304. width = runewidth.StringWidth(string(cluster))
  305. }
  306. i += len(cluster)
  307. r, _ := utf8.DecodeRune(cluster)
  308. switch {
  309. case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
  310. addWord()
  311. space.WriteRune(r)
  312. case bytes.ContainsAny(cluster, breakpoints):
  313. addSpace()
  314. if curWidth+wordLen+width > limit {
  315. word.Write(cluster)
  316. wordLen += width
  317. } else {
  318. addWord()
  319. buf.Write(cluster)
  320. curWidth += width
  321. }
  322. default:
  323. if wordLen+width > limit {
  324. // Hardwrap the word if it's too long
  325. addWord()
  326. }
  327. word.Write(cluster)
  328. wordLen += width
  329. if curWidth+wordLen+space.Len() > limit {
  330. addNewline()
  331. }
  332. }
  333. pstate = parser.GroundState
  334. continue
  335. }
  336. switch action {
  337. case parser.PrintAction, parser.ExecuteAction:
  338. switch r := rune(b[i]); {
  339. case r == '\n':
  340. if wordLen == 0 {
  341. if curWidth+space.Len() > limit {
  342. curWidth = 0
  343. } else {
  344. // preserve whitespaces
  345. buf.Write(space.Bytes())
  346. }
  347. space.Reset()
  348. }
  349. addWord()
  350. addNewline()
  351. case unicode.IsSpace(r):
  352. addWord()
  353. space.WriteRune(r)
  354. case r == '-':
  355. fallthrough
  356. case runeContainsAny(r, breakpoints):
  357. addSpace()
  358. if curWidth+wordLen >= limit {
  359. // We can't fit the breakpoint in the current line, treat
  360. // it as part of the word.
  361. word.WriteRune(r)
  362. wordLen++
  363. } else {
  364. addWord()
  365. buf.WriteRune(r)
  366. curWidth++
  367. }
  368. default:
  369. if curWidth == limit {
  370. addNewline()
  371. }
  372. word.WriteRune(r)
  373. wordLen++
  374. if wordLen == limit {
  375. // Hardwrap the word if it's too long
  376. addWord()
  377. }
  378. if curWidth+wordLen+space.Len() > limit {
  379. addNewline()
  380. }
  381. }
  382. default:
  383. word.WriteByte(b[i])
  384. }
  385. // We manage the UTF8 state separately manually above.
  386. if pstate != parser.Utf8State {
  387. pstate = state
  388. }
  389. i++
  390. }
  391. if wordLen == 0 {
  392. if curWidth+space.Len() > limit {
  393. curWidth = 0
  394. } else {
  395. // preserve whitespaces
  396. buf.Write(space.Bytes())
  397. }
  398. space.Reset()
  399. }
  400. addWord()
  401. return buf.String()
  402. }
  403. func runeContainsAny(r rune, s string) bool {
  404. for _, c := range s {
  405. if c == r {
  406. return true
  407. }
  408. }
  409. return false
  410. }