| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406 |
- package ansi
- import (
- "bytes"
- "unicode"
- "unicode/utf8"
- "github.com/charmbracelet/x/ansi/parser"
- "github.com/rivo/uniseg"
- )
- // nbsp is a non-breaking space
- const nbsp = 0xA0
- // Hardwrap wraps a string or a block of text to a given line length, breaking
- // word boundaries. This will preserve ANSI escape codes and will account for
- // wide-characters in the string.
- // When preserveSpace is true, spaces at the beginning of a line will be
- // preserved.
- func Hardwrap(s string, limit int, preserveSpace bool) string {
- if limit < 1 {
- return s
- }
- var (
- cluster []byte
- buf bytes.Buffer
- curWidth int
- forceNewline bool
- gstate = -1
- pstate = parser.GroundState // initial state
- b = []byte(s)
- )
- addNewline := func() {
- buf.WriteByte('\n')
- curWidth = 0
- }
- i := 0
- for i < len(b) {
- state, action := parser.Table.Transition(pstate, b[i])
- switch action {
- case parser.PrintAction:
- if utf8ByteLen(b[i]) > 1 {
- var width int
- cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
- i += len(cluster)
- if curWidth+width > limit {
- addNewline()
- }
- if !preserveSpace && curWidth == 0 && len(cluster) <= 4 {
- // Skip spaces at the beginning of a line
- if r, _ := utf8.DecodeRune(cluster); r != utf8.RuneError && unicode.IsSpace(r) {
- pstate = parser.GroundState
- continue
- }
- }
- buf.Write(cluster)
- curWidth += width
- gstate = -1 // reset grapheme state otherwise, width calculation might be off
- pstate = parser.GroundState
- continue
- }
- fallthrough
- case parser.ExecuteAction:
- if b[i] == '\n' {
- addNewline()
- forceNewline = false
- break
- }
- if curWidth+1 > limit {
- addNewline()
- forceNewline = true
- }
- // Skip spaces at the beginning of a line
- if curWidth == 0 {
- if !preserveSpace && forceNewline && unicode.IsSpace(rune(b[i])) {
- break
- }
- forceNewline = false
- }
- buf.WriteByte(b[i])
- curWidth++
- default:
- buf.WriteByte(b[i])
- }
- // We manage the UTF8 state separately manually above.
- if pstate != parser.Utf8State {
- pstate = state
- }
- i++
- }
- return buf.String()
- }
- // Wordwrap wraps a string or a block of text to a given line length, not
- // breaking word boundaries. This will preserve ANSI escape codes and will
- // account for wide-characters in the string.
- // The breakpoints string is a list of characters that are considered
- // breakpoints for word wrapping. A hyphen (-) is always considered a
- // breakpoint.
- //
- // Note: breakpoints must be a string of 1-cell wide rune characters.
- func Wordwrap(s string, limit int, breakpoints string) string {
- if limit < 1 {
- return s
- }
- var (
- cluster []byte
- buf bytes.Buffer
- word bytes.Buffer
- space bytes.Buffer
- curWidth int
- wordLen int
- gstate = -1
- pstate = parser.GroundState // initial state
- b = []byte(s)
- )
- addSpace := func() {
- curWidth += space.Len()
- buf.Write(space.Bytes())
- space.Reset()
- }
- addWord := func() {
- if word.Len() == 0 {
- return
- }
- addSpace()
- curWidth += wordLen
- buf.Write(word.Bytes())
- word.Reset()
- wordLen = 0
- }
- addNewline := func() {
- buf.WriteByte('\n')
- curWidth = 0
- space.Reset()
- }
- i := 0
- for i < len(b) {
- state, action := parser.Table.Transition(pstate, b[i])
- switch action {
- case parser.PrintAction:
- if utf8ByteLen(b[i]) > 1 {
- var width int
- cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
- i += len(cluster)
- r, _ := utf8.DecodeRune(cluster)
- if r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp {
- addWord()
- space.WriteRune(r)
- } else if bytes.ContainsAny(cluster, breakpoints) {
- addSpace()
- addWord()
- buf.Write(cluster)
- curWidth++
- } else {
- word.Write(cluster)
- wordLen += width
- if curWidth+space.Len()+wordLen > limit &&
- wordLen < limit {
- addNewline()
- }
- }
- pstate = parser.GroundState
- continue
- }
- fallthrough
- case parser.ExecuteAction:
- r := rune(b[i])
- switch {
- case r == '\n':
- if wordLen == 0 {
- if curWidth+space.Len() > limit {
- curWidth = 0
- } else {
- buf.Write(space.Bytes())
- }
- space.Reset()
- }
- addWord()
- addNewline()
- case unicode.IsSpace(r):
- addWord()
- space.WriteByte(b[i])
- case r == '-':
- fallthrough
- case runeContainsAny(r, breakpoints):
- addSpace()
- addWord()
- buf.WriteByte(b[i])
- curWidth++
- default:
- word.WriteByte(b[i])
- wordLen++
- if curWidth+space.Len()+wordLen > limit &&
- wordLen < limit {
- addNewline()
- }
- }
- default:
- word.WriteByte(b[i])
- }
- // We manage the UTF8 state separately manually above.
- if pstate != parser.Utf8State {
- pstate = state
- }
- i++
- }
- addWord()
- return buf.String()
- }
- // Wrap wraps a string or a block of text to a given line length, breaking word
- // boundaries if necessary. This will preserve ANSI escape codes and will
- // account for wide-characters in the string. The breakpoints string is a list
- // of characters that are considered breakpoints for word wrapping. A hyphen
- // (-) is always considered a breakpoint.
- //
- // Note: breakpoints must be a string of 1-cell wide rune characters.
- func Wrap(s string, limit int, breakpoints string) string {
- if limit < 1 {
- return s
- }
- var (
- cluster []byte
- buf bytes.Buffer
- word bytes.Buffer
- space bytes.Buffer
- curWidth int // written width of the line
- wordLen int // word buffer len without ANSI escape codes
- gstate = -1
- pstate = parser.GroundState // initial state
- b = []byte(s)
- )
- addSpace := func() {
- curWidth += space.Len()
- buf.Write(space.Bytes())
- space.Reset()
- }
- addWord := func() {
- if word.Len() == 0 {
- return
- }
- addSpace()
- curWidth += wordLen
- buf.Write(word.Bytes())
- word.Reset()
- wordLen = 0
- }
- addNewline := func() {
- buf.WriteByte('\n')
- curWidth = 0
- space.Reset()
- }
- i := 0
- for i < len(b) {
- state, action := parser.Table.Transition(pstate, b[i])
- switch action {
- case parser.PrintAction:
- if utf8ByteLen(b[i]) > 1 {
- var width int
- cluster, _, width, gstate = uniseg.FirstGraphemeCluster(b[i:], gstate)
- i += len(cluster)
- r, _ := utf8.DecodeRune(cluster)
- switch {
- case r != utf8.RuneError && unicode.IsSpace(r) && r != nbsp: // nbsp is a non-breaking space
- addWord()
- space.WriteRune(r)
- case bytes.ContainsAny(cluster, breakpoints):
- addSpace()
- if curWidth+wordLen+width > limit {
- word.Write(cluster)
- wordLen += width
- } else {
- addWord()
- buf.Write(cluster)
- curWidth += width
- }
- default:
- if wordLen+width > limit {
- // Hardwrap the word if it's too long
- addWord()
- }
- word.Write(cluster)
- wordLen += width
- if curWidth+wordLen+space.Len() > limit {
- addNewline()
- }
- }
- pstate = parser.GroundState
- continue
- }
- fallthrough
- case parser.ExecuteAction:
- switch r := rune(b[i]); {
- case r == '\n':
- if wordLen == 0 {
- if curWidth+space.Len() > limit {
- curWidth = 0
- } else {
- // preserve whitespaces
- buf.Write(space.Bytes())
- }
- space.Reset()
- }
- addWord()
- addNewline()
- case unicode.IsSpace(r):
- addWord()
- space.WriteRune(r)
- case r == '-':
- fallthrough
- case runeContainsAny(r, breakpoints):
- addSpace()
- if curWidth+wordLen >= limit {
- // We can't fit the breakpoint in the current line, treat
- // it as part of the word.
- word.WriteRune(r)
- wordLen++
- } else {
- addWord()
- buf.WriteRune(r)
- curWidth++
- }
- default:
- word.WriteRune(r)
- wordLen++
- if wordLen == limit {
- // Hardwrap the word if it's too long
- addWord()
- }
- if curWidth+wordLen+space.Len() > limit {
- addNewline()
- }
- }
- default:
- word.WriteByte(b[i])
- }
- // We manage the UTF8 state separately manually above.
- if pstate != parser.Utf8State {
- pstate = state
- }
- i++
- }
- if word.Len() != 0 {
- // Preserve ANSI wrapped spaces at the end of string
- if curWidth+space.Len() > limit {
- buf.WriteByte('\n')
- }
- addSpace()
- }
- buf.Write(word.Bytes())
- return buf.String()
- }
- func runeContainsAny(r rune, s string) bool {
- for _, c := range s {
- if c == r {
- return true
- }
- }
- return false
- }
|