| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524 |
- package ansi
- import (
- "unicode/utf8"
- "github.com/charmbracelet/x/ansi/parser"
- "github.com/mattn/go-runewidth"
- "github.com/rivo/uniseg"
- )
- // State represents the state of the ANSI escape sequence parser used by
- // [DecodeSequence].
- type State = byte
- // ANSI escape sequence states used by [DecodeSequence].
- const (
- NormalState State = iota
- PrefixState
- ParamsState
- IntermedState
- EscapeState
- StringState
- )
- // DecodeSequence decodes the first ANSI escape sequence or a printable
- // grapheme from the given data. It returns the sequence slice, the number of
- // bytes read, the cell width for each sequence, and the new state.
- //
- // The cell width will always be 0 for control and escape sequences, 1 for
- // ASCII printable characters, and the number of cells other Unicode characters
- // occupy. It uses the uniseg package to calculate the width of Unicode
- // graphemes and characters. This means it will always do grapheme clustering
- // (mode 2027).
- //
- // Passing a non-nil [*Parser] as the last argument will allow the decoder to
- // collect sequence parameters, data, and commands. The parser cmd will have
- // the packed command value that contains intermediate and prefix characters.
- // In the case of a OSC sequence, the cmd will be the OSC command number. Use
- // [Cmd] and [Param] types to unpack command intermediates and prefixes as well
- // as parameters.
- //
- // Zero [Cmd] means the CSI, DCS, or ESC sequence is invalid. Moreover, checking the
- // validity of other data sequences, OSC, DCS, etc, will require checking for
- // the returned sequence terminator bytes such as ST (ESC \\) and BEL).
- //
- // We store the command byte in [Cmd] in the most significant byte, the
- // prefix byte in the next byte, and the intermediate byte in the least
- // significant byte. This is done to avoid using a struct to store the command
- // and its intermediates and prefixes. The command byte is always the least
- // significant byte i.e. [Cmd & 0xff]. Use the [Cmd] type to unpack the
- // command, intermediate, and prefix bytes. Note that we only collect the last
- // prefix character and intermediate byte.
- //
- // The [p.Params] slice will contain the parameters of the sequence. Any
- // sub-parameter will have the [parser.HasMoreFlag] set. Use the [Param] type
- // to unpack the parameters.
- //
- // Example:
- //
- // var state byte // the initial state is always zero [NormalState]
- // p := NewParser(32, 1024) // create a new parser with a 32 params buffer and 1024 data buffer (optional)
- // input := []byte("\x1b[31mHello, World!\x1b[0m")
- // for len(input) > 0 {
- // seq, width, n, newState := DecodeSequence(input, state, p)
- // log.Printf("seq: %q, width: %d", seq, width)
- // state = newState
- // input = input[n:]
- // }
- //
- // This function treats the text as a sequence of grapheme clusters.
- func DecodeSequence[T string | []byte](b T, state byte, p *Parser) (seq T, width int, n int, newState byte) {
- return decodeSequence(GraphemeWidth, b, state, p)
- }
- // DecodeSequenceWc decodes the first ANSI escape sequence or a printable
- // grapheme from the given data. It returns the sequence slice, the number of
- // bytes read, the cell width for each sequence, and the new state.
- //
- // The cell width will always be 0 for control and escape sequences, 1 for
- // ASCII printable characters, and the number of cells other Unicode characters
- // occupy. It uses the uniseg package to calculate the width of Unicode
- // graphemes and characters. This means it will always do grapheme clustering
- // (mode 2027).
- //
- // Passing a non-nil [*Parser] as the last argument will allow the decoder to
- // collect sequence parameters, data, and commands. The parser cmd will have
- // the packed command value that contains intermediate and prefix characters.
- // In the case of a OSC sequence, the cmd will be the OSC command number. Use
- // [Cmd] and [Param] types to unpack command intermediates and prefixes as well
- // as parameters.
- //
- // Zero [Cmd] means the CSI, DCS, or ESC sequence is invalid. Moreover, checking the
- // validity of other data sequences, OSC, DCS, etc, will require checking for
- // the returned sequence terminator bytes such as ST (ESC \\) and BEL).
- //
- // We store the command byte in [Cmd] in the most significant byte, the
- // prefix byte in the next byte, and the intermediate byte in the least
- // significant byte. This is done to avoid using a struct to store the command
- // and its intermediates and prefixes. The command byte is always the least
- // significant byte i.e. [Cmd & 0xff]. Use the [Cmd] type to unpack the
- // command, intermediate, and prefix bytes. Note that we only collect the last
- // prefix character and intermediate byte.
- //
- // The [p.Params] slice will contain the parameters of the sequence. Any
- // sub-parameter will have the [parser.HasMoreFlag] set. Use the [Param] type
- // to unpack the parameters.
- //
- // Example:
- //
- // var state byte // the initial state is always zero [NormalState]
- // p := NewParser(32, 1024) // create a new parser with a 32 params buffer and 1024 data buffer (optional)
- // input := []byte("\x1b[31mHello, World!\x1b[0m")
- // for len(input) > 0 {
- // seq, width, n, newState := DecodeSequenceWc(input, state, p)
- // log.Printf("seq: %q, width: %d", seq, width)
- // state = newState
- // input = input[n:]
- // }
- //
- // This function treats the text as a sequence of wide characters and runes.
- func DecodeSequenceWc[T string | []byte](b T, state byte, p *Parser) (seq T, width int, n int, newState byte) {
- return decodeSequence(WcWidth, b, state, p)
- }
- func decodeSequence[T string | []byte](m Method, b T, state State, p *Parser) (seq T, width int, n int, newState byte) {
- for i := 0; i < len(b); i++ {
- c := b[i]
- switch state {
- case NormalState:
- switch c {
- case ESC:
- if p != nil {
- if len(p.params) > 0 {
- p.params[0] = parser.MissingParam
- }
- p.cmd = 0
- p.paramsLen = 0
- p.dataLen = 0
- }
- state = EscapeState
- continue
- case CSI, DCS:
- if p != nil {
- if len(p.params) > 0 {
- p.params[0] = parser.MissingParam
- }
- p.cmd = 0
- p.paramsLen = 0
- p.dataLen = 0
- }
- state = PrefixState
- continue
- case OSC, APC, SOS, PM:
- if p != nil {
- p.cmd = parser.MissingCommand
- p.dataLen = 0
- }
- state = StringState
- continue
- }
- if p != nil {
- p.dataLen = 0
- p.paramsLen = 0
- p.cmd = 0
- }
- if c > US && c < DEL {
- // ASCII printable characters
- return b[i : i+1], 1, 1, NormalState
- }
- if c <= US || c == DEL || c < 0xC0 {
- // C0 & C1 control characters & DEL
- return b[i : i+1], 0, 1, NormalState
- }
- if utf8.RuneStart(c) {
- seq, _, width, _ = FirstGraphemeCluster(b, -1)
- if m == WcWidth {
- width = runewidth.StringWidth(string(seq))
- }
- i += len(seq)
- return b[:i], width, i, NormalState
- }
- // Invalid UTF-8 sequence
- return b[:i], 0, i, NormalState
- case PrefixState:
- if c >= '<' && c <= '?' {
- if p != nil {
- // We only collect the last prefix character.
- p.cmd &^= 0xff << parser.PrefixShift
- p.cmd |= int(c) << parser.PrefixShift
- }
- break
- }
- state = ParamsState
- fallthrough
- case ParamsState:
- if c >= '0' && c <= '9' {
- if p != nil {
- if p.params[p.paramsLen] == parser.MissingParam {
- p.params[p.paramsLen] = 0
- }
- p.params[p.paramsLen] *= 10
- p.params[p.paramsLen] += int(c - '0')
- }
- break
- }
- if c == ':' {
- if p != nil {
- p.params[p.paramsLen] |= parser.HasMoreFlag
- }
- }
- if c == ';' || c == ':' {
- if p != nil {
- p.paramsLen++
- if p.paramsLen < len(p.params) {
- p.params[p.paramsLen] = parser.MissingParam
- }
- }
- break
- }
- state = IntermedState
- fallthrough
- case IntermedState:
- if c >= ' ' && c <= '/' {
- if p != nil {
- p.cmd &^= 0xff << parser.IntermedShift
- p.cmd |= int(c) << parser.IntermedShift
- }
- break
- }
- if p != nil {
- // Increment the last parameter
- if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
- p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
- p.paramsLen++
- }
- }
- if c >= '@' && c <= '~' {
- if p != nil {
- p.cmd &^= 0xff
- p.cmd |= int(c)
- }
- if HasDcsPrefix(b) {
- // Continue to collect DCS data
- if p != nil {
- p.dataLen = 0
- }
- state = StringState
- continue
- }
- return b[:i+1], 0, i + 1, NormalState
- }
- // Invalid CSI/DCS sequence
- return b[:i], 0, i, NormalState
- case EscapeState:
- switch c {
- case '[', 'P':
- if p != nil {
- if len(p.params) > 0 {
- p.params[0] = parser.MissingParam
- }
- p.paramsLen = 0
- p.cmd = 0
- }
- state = PrefixState
- continue
- case ']', 'X', '^', '_':
- if p != nil {
- p.cmd = parser.MissingCommand
- p.dataLen = 0
- }
- state = StringState
- continue
- }
- if c >= ' ' && c <= '/' {
- if p != nil {
- p.cmd &^= 0xff << parser.IntermedShift
- p.cmd |= int(c) << parser.IntermedShift
- }
- continue
- } else if c >= '0' && c <= '~' {
- if p != nil {
- p.cmd &^= 0xff
- p.cmd |= int(c)
- }
- return b[:i+1], 0, i + 1, NormalState
- }
- // Invalid escape sequence
- return b[:i], 0, i, NormalState
- case StringState:
- switch c {
- case BEL:
- if HasOscPrefix(b) {
- parseOscCmd(p)
- return b[:i+1], 0, i + 1, NormalState
- }
- case CAN, SUB:
- if HasOscPrefix(b) {
- // Ensure we parse the OSC command number
- parseOscCmd(p)
- }
- // Cancel the sequence
- return b[:i], 0, i, NormalState
- case ST:
- if HasOscPrefix(b) {
- // Ensure we parse the OSC command number
- parseOscCmd(p)
- }
- return b[:i+1], 0, i + 1, NormalState
- case ESC:
- if HasStPrefix(b[i:]) {
- if HasOscPrefix(b) {
- // Ensure we parse the OSC command number
- parseOscCmd(p)
- }
- // End of string 7-bit (ST)
- return b[:i+2], 0, i + 2, NormalState
- }
- // Otherwise, cancel the sequence
- return b[:i], 0, i, NormalState
- }
- if p != nil && p.dataLen < len(p.data) {
- p.data[p.dataLen] = c
- p.dataLen++
- // Parse the OSC command number
- if c == ';' && HasOscPrefix(b) {
- parseOscCmd(p)
- }
- }
- }
- }
- return b, 0, len(b), state
- }
- func parseOscCmd(p *Parser) {
- if p == nil || p.cmd != parser.MissingCommand {
- return
- }
- for j := 0; j < p.dataLen; j++ {
- d := p.data[j]
- if d < '0' || d > '9' {
- break
- }
- if p.cmd == parser.MissingCommand {
- p.cmd = 0
- }
- p.cmd *= 10
- p.cmd += int(d - '0')
- }
- }
- // Equal returns true if the given byte slices are equal.
- func Equal[T string | []byte](a, b T) bool {
- return string(a) == string(b)
- }
- // HasPrefix returns true if the given byte slice has prefix.
- func HasPrefix[T string | []byte](b, prefix T) bool {
- return len(b) >= len(prefix) && Equal(b[0:len(prefix)], prefix)
- }
- // HasSuffix returns true if the given byte slice has suffix.
- func HasSuffix[T string | []byte](b, suffix T) bool {
- return len(b) >= len(suffix) && Equal(b[len(b)-len(suffix):], suffix)
- }
- // HasCsiPrefix returns true if the given byte slice has a CSI prefix.
- func HasCsiPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == CSI) ||
- (len(b) > 1 && b[0] == ESC && b[1] == '[')
- }
- // HasOscPrefix returns true if the given byte slice has an OSC prefix.
- func HasOscPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == OSC) ||
- (len(b) > 1 && b[0] == ESC && b[1] == ']')
- }
- // HasApcPrefix returns true if the given byte slice has an APC prefix.
- func HasApcPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == APC) ||
- (len(b) > 1 && b[0] == ESC && b[1] == '_')
- }
- // HasDcsPrefix returns true if the given byte slice has a DCS prefix.
- func HasDcsPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == DCS) ||
- (len(b) > 1 && b[0] == ESC && b[1] == 'P')
- }
- // HasSosPrefix returns true if the given byte slice has a SOS prefix.
- func HasSosPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == SOS) ||
- (len(b) > 1 && b[0] == ESC && b[1] == 'X')
- }
- // HasPmPrefix returns true if the given byte slice has a PM prefix.
- func HasPmPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == PM) ||
- (len(b) > 1 && b[0] == ESC && b[1] == '^')
- }
- // HasStPrefix returns true if the given byte slice has a ST prefix.
- func HasStPrefix[T string | []byte](b T) bool {
- return (len(b) > 0 && b[0] == ST) ||
- (len(b) > 1 && b[0] == ESC && b[1] == '\\')
- }
- // HasEscPrefix returns true if the given byte slice has an ESC prefix.
- func HasEscPrefix[T string | []byte](b T) bool {
- return len(b) > 0 && b[0] == ESC
- }
- // FirstGraphemeCluster returns the first grapheme cluster in the given string or byte slice.
- // This is a syntactic sugar function that wraps
- // uniseg.FirstGraphemeClusterInString and uniseg.FirstGraphemeCluster.
- func FirstGraphemeCluster[T string | []byte](b T, state int) (T, T, int, int) {
- switch b := any(b).(type) {
- case string:
- cluster, rest, width, newState := uniseg.FirstGraphemeClusterInString(b, state)
- return T(cluster), T(rest), width, newState
- case []byte:
- cluster, rest, width, newState := uniseg.FirstGraphemeCluster(b, state)
- return T(cluster), T(rest), width, newState
- }
- panic("unreachable")
- }
- // Cmd represents a sequence command. This is used to pack/unpack a sequence
- // command with its intermediate and prefix characters. Those are commonly
- // found in CSI and DCS sequences.
- type Cmd int
- // Prefix returns the unpacked prefix byte of the CSI sequence.
- // This is always gonna be one of the following '<' '=' '>' '?' and in the
- // range of 0x3C-0x3F.
- // Zero is returned if the sequence does not have a prefix.
- func (c Cmd) Prefix() byte {
- return byte(parser.Prefix(int(c)))
- }
- // Intermediate returns the unpacked intermediate byte of the CSI sequence.
- // An intermediate byte is in the range of 0x20-0x2F. This includes these
- // characters from ' ', '!', '"', '#', '$', '%', '&', ”', '(', ')', '*', '+',
- // ',', '-', '.', '/'.
- // Zero is returned if the sequence does not have an intermediate byte.
- func (c Cmd) Intermediate() byte {
- return byte(parser.Intermediate(int(c)))
- }
- // Final returns the unpacked command byte of the CSI sequence.
- func (c Cmd) Final() byte {
- return byte(parser.Command(int(c)))
- }
- // Command packs a command with the given prefix, intermediate, and final. A
- // zero byte means the sequence does not have a prefix or intermediate.
- //
- // Prefixes are in the range of 0x3C-0x3F that is one of `<=>?`.
- //
- // Intermediates are in the range of 0x20-0x2F that is anything in
- // `!"#$%&'()*+,-./`.
- //
- // Final bytes are in the range of 0x40-0x7E that is anything in the range
- // `@A–Z[\]^_`a–z{|}~`.
- func Command(prefix, inter, final byte) (c int) {
- c = int(final)
- c |= int(prefix) << parser.PrefixShift
- c |= int(inter) << parser.IntermedShift
- return
- }
- // Param represents a sequence parameter. Sequence parameters with
- // sub-parameters are packed with the HasMoreFlag set. This is used to unpack
- // the parameters from a CSI and DCS sequences.
- type Param int
- // Param returns the unpacked parameter at the given index.
- // It returns the default value if the parameter is missing.
- func (s Param) Param(def int) int {
- p := int(s) & parser.ParamMask
- if p == parser.MissingParam {
- return def
- }
- return p
- }
- // HasMore unpacks the HasMoreFlag from the parameter.
- func (s Param) HasMore() bool {
- return s&parser.HasMoreFlag != 0
- }
- // Parameter packs an escape code parameter with the given parameter and
- // whether this parameter has following sub-parameters.
- func Parameter(p int, hasMore bool) (s int) {
- s = p & parser.ParamMask
- if hasMore {
- s |= parser.HasMoreFlag
- }
- return
- }
|