| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417 |
- package ansi
- import (
- "unicode/utf8"
- "unsafe"
- "github.com/charmbracelet/x/ansi/parser"
- )
- // Parser represents a DEC ANSI compatible sequence parser.
- //
- // It uses a state machine to parse ANSI escape sequences and control
- // characters. The parser is designed to be used with a terminal emulator or
- // similar application that needs to parse ANSI escape sequences and control
- // characters.
- // See package [parser] for more information.
- //
- //go:generate go run ./gen.go
- type Parser struct {
- handler Handler
- // params contains the raw parameters of the sequence.
- // These parameters used when constructing CSI and DCS sequences.
- params []int
- // data contains the raw data of the sequence.
- // These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
- data []byte
- // dataLen keeps track of the length of the data buffer.
- // If dataLen is -1, the data buffer is unlimited and will grow as needed.
- // Otherwise, dataLen is limited by the size of the data buffer.
- dataLen int
- // paramsLen keeps track of the number of parameters.
- // This is limited by the size of the params buffer.
- //
- // This is also used when collecting UTF-8 runes to keep track of the
- // number of rune bytes collected.
- paramsLen int
- // cmd contains the raw command along with the private prefix and
- // intermediate bytes of the sequence.
- // The first lower byte contains the command byte, the next byte contains
- // the private prefix, and the next byte contains the intermediate byte.
- //
- // This is also used when collecting UTF-8 runes treating it as a slice of
- // 4 bytes.
- cmd int
- // state is the current state of the parser.
- state byte
- }
- // NewParser returns a new parser with the default settings.
- // The [Parser] uses a default size of 32 for the parameters and 64KB for the
- // data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the
- // size of the parameters and data buffer respectively.
- func NewParser() *Parser {
- p := new(Parser)
- p.SetParamsSize(parser.MaxParamsSize)
- p.SetDataSize(1024 * 64) // 64KB data buffer
- return p
- }
- // SetParamsSize sets the size of the parameters buffer.
- // This is used when constructing CSI and DCS sequences.
- func (p *Parser) SetParamsSize(size int) {
- p.params = make([]int, size)
- }
- // SetDataSize sets the size of the data buffer.
- // This is used when constructing OSC, DCS, SOS, PM, and APC sequences.
- // If size is less than or equal to 0, the data buffer is unlimited and will
- // grow as needed.
- func (p *Parser) SetDataSize(size int) {
- if size <= 0 {
- size = 0
- p.dataLen = -1
- }
- p.data = make([]byte, size)
- }
- // Params returns the list of parsed packed parameters.
- func (p *Parser) Params() Params {
- return unsafe.Slice((*Param)(unsafe.Pointer(&p.params[0])), p.paramsLen)
- }
- // Param returns the parameter at the given index and falls back to the default
- // value if the parameter is missing. If the index is out of bounds, it returns
- // the default value and false.
- func (p *Parser) Param(i, def int) (int, bool) {
- if i < 0 || i >= p.paramsLen {
- return def, false
- }
- return Param(p.params[i]).Param(def), true
- }
- // Command returns the packed command of the last dispatched sequence. Use
- // [Cmd] to unpack the command.
- func (p *Parser) Command() int {
- return p.cmd
- }
- // Rune returns the last dispatched sequence as a rune.
- func (p *Parser) Rune() rune {
- rw := utf8ByteLen(byte(p.cmd & 0xff))
- if rw == -1 {
- return utf8.RuneError
- }
- r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw])
- return r
- }
- // Control returns the last dispatched sequence as a control code.
- func (p *Parser) Control() byte {
- return byte(p.cmd & 0xff)
- }
- // Data returns the raw data of the last dispatched sequence.
- func (p *Parser) Data() []byte {
- return p.data[:p.dataLen]
- }
- // Reset resets the parser to its initial state.
- func (p *Parser) Reset() {
- p.clear()
- p.state = parser.GroundState
- }
- // clear clears the parser parameters and command.
- func (p *Parser) clear() {
- if len(p.params) > 0 {
- p.params[0] = parser.MissingParam
- }
- p.paramsLen = 0
- p.cmd = 0
- }
- // State returns the current state of the parser.
- func (p *Parser) State() parser.State {
- return p.state
- }
- // StateName returns the name of the current state.
- func (p *Parser) StateName() string {
- return parser.StateNames[p.state]
- }
- // Parse parses the given dispatcher and byte buffer.
- // Deprecated: Loop over the buffer and call [Parser.Advance] instead.
- func (p *Parser) Parse(b []byte) {
- for i := 0; i < len(b); i++ {
- p.Advance(b[i])
- }
- }
- // Advance advances the parser using the given byte. It returns the action
- // performed by the parser.
- func (p *Parser) Advance(b byte) parser.Action {
- switch p.state {
- case parser.Utf8State:
- // We handle UTF-8 here.
- return p.advanceUtf8(b)
- default:
- return p.advance(b)
- }
- }
- func (p *Parser) collectRune(b byte) {
- if p.paramsLen >= utf8.UTFMax {
- return
- }
- shift := p.paramsLen * 8
- p.cmd &^= 0xff << shift
- p.cmd |= int(b) << shift
- p.paramsLen++
- }
- func (p *Parser) advanceUtf8(b byte) parser.Action {
- // Collect UTF-8 rune bytes.
- p.collectRune(b)
- rw := utf8ByteLen(byte(p.cmd & 0xff))
- if rw == -1 {
- // We panic here because the first byte comes from the state machine,
- // if this panics, it means there is a bug in the state machine!
- panic("invalid rune") // unreachable
- }
- if p.paramsLen < rw {
- return parser.CollectAction
- }
- // We have enough bytes to decode the rune using unsafe
- if p.handler.Print != nil {
- p.handler.Print(p.Rune())
- }
- p.state = parser.GroundState
- p.paramsLen = 0
- return parser.PrintAction
- }
- func (p *Parser) advance(b byte) parser.Action {
- state, action := parser.Table.Transition(p.state, b)
- // We need to clear the parser state if the state changes from EscapeState.
- // This is because when we enter the EscapeState, we don't get a chance to
- // clear the parser state. For example, when a sequence terminates with a
- // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
- // EscapeState. However, the parser state is not cleared in this case and
- // we need to clear it here before dispatching the esc sequence.
- if p.state != state {
- if p.state == parser.EscapeState {
- p.performAction(parser.ClearAction, state, b)
- }
- if action == parser.PutAction &&
- p.state == parser.DcsEntryState && state == parser.DcsStringState {
- // XXX: This is a special case where we need to start collecting
- // non-string parameterized data i.e. doesn't follow the ECMA-48 §
- // 5.4.1 string parameters format.
- p.performAction(parser.StartAction, state, 0)
- }
- }
- // Handle special cases
- switch {
- case b == ESC && p.state == parser.EscapeState:
- // Two ESCs in a row
- p.performAction(parser.ExecuteAction, state, b)
- default:
- p.performAction(action, state, b)
- }
- p.state = state
- return action
- }
- func (p *Parser) parseStringCmd() {
- // Try to parse the command
- datalen := len(p.data)
- if p.dataLen >= 0 {
- datalen = p.dataLen
- }
- for i := 0; i < datalen; i++ {
- d := p.data[i]
- if d < '0' || d > '9' {
- break
- }
- if p.cmd == parser.MissingCommand {
- p.cmd = 0
- }
- p.cmd *= 10
- p.cmd += int(d - '0')
- }
- }
- func (p *Parser) performAction(action parser.Action, state parser.State, b byte) {
- switch action {
- case parser.IgnoreAction:
- break
- case parser.ClearAction:
- p.clear()
- case parser.PrintAction:
- p.cmd = int(b)
- if p.handler.Print != nil {
- p.handler.Print(rune(b))
- }
- case parser.ExecuteAction:
- p.cmd = int(b)
- if p.handler.Execute != nil {
- p.handler.Execute(b)
- }
- case parser.PrefixAction:
- // Collect private prefix
- // we only store the last prefix
- p.cmd &^= 0xff << parser.PrefixShift
- p.cmd |= int(b) << parser.PrefixShift
- case parser.CollectAction:
- if state == parser.Utf8State {
- // Reset the UTF-8 counter
- p.paramsLen = 0
- p.collectRune(b)
- } else {
- // Collect intermediate bytes
- // we only store the last intermediate byte
- p.cmd &^= 0xff << parser.IntermedShift
- p.cmd |= int(b) << parser.IntermedShift
- }
- case parser.ParamAction:
- // Collect parameters
- if p.paramsLen >= len(p.params) {
- break
- }
- if b >= '0' && b <= '9' {
- if p.params[p.paramsLen] == parser.MissingParam {
- p.params[p.paramsLen] = 0
- }
- p.params[p.paramsLen] *= 10
- p.params[p.paramsLen] += int(b - '0')
- }
- if b == ':' {
- p.params[p.paramsLen] |= parser.HasMoreFlag
- }
- if b == ';' || b == ':' {
- p.paramsLen++
- if p.paramsLen < len(p.params) {
- p.params[p.paramsLen] = parser.MissingParam
- }
- }
- case parser.StartAction:
- if p.dataLen < 0 && p.data != nil {
- p.data = p.data[:0]
- } else {
- p.dataLen = 0
- }
- if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState {
- // Collect the command byte for DCS
- p.cmd |= int(b)
- } else {
- p.cmd = parser.MissingCommand
- }
- case parser.PutAction:
- switch p.state {
- case parser.OscStringState:
- if b == ';' && p.cmd == parser.MissingCommand {
- p.parseStringCmd()
- }
- }
- if p.dataLen < 0 {
- p.data = append(p.data, b)
- } else {
- if p.dataLen < len(p.data) {
- p.data[p.dataLen] = b
- p.dataLen++
- }
- }
- case parser.DispatchAction:
- // Increment the last parameter
- if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
- p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
- p.paramsLen++
- }
- if p.state == parser.OscStringState && p.cmd == parser.MissingCommand {
- // Ensure we have a command for OSC
- p.parseStringCmd()
- }
- data := p.data
- if p.dataLen >= 0 {
- data = data[:p.dataLen]
- }
- switch p.state {
- case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
- p.cmd |= int(b)
- if p.handler.HandleCsi != nil {
- p.handler.HandleCsi(Cmd(p.cmd), p.Params())
- }
- case parser.EscapeState, parser.EscapeIntermediateState:
- p.cmd |= int(b)
- if p.handler.HandleEsc != nil {
- p.handler.HandleEsc(Cmd(p.cmd))
- }
- case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
- if p.handler.HandleDcs != nil {
- p.handler.HandleDcs(Cmd(p.cmd), p.Params(), data)
- }
- case parser.OscStringState:
- if p.handler.HandleOsc != nil {
- p.handler.HandleOsc(p.cmd, data)
- }
- case parser.SosStringState:
- if p.handler.HandleSos != nil {
- p.handler.HandleSos(data)
- }
- case parser.PmStringState:
- if p.handler.HandlePm != nil {
- p.handler.HandlePm(data)
- }
- case parser.ApcStringState:
- if p.handler.HandleApc != nil {
- p.handler.HandleApc(data)
- }
- }
- }
- }
- func utf8ByteLen(b byte) int {
- if b <= 0b0111_1111 { // 0x00-0x7F
- return 1
- } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
- return 2
- } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
- return 3
- } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
- return 4
- }
- return -1
- }
|