| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357 |
- package ansi
- import (
- "unicode/utf8"
- "github.com/charmbracelet/x/ansi/parser"
- )
- // ParserDispatcher is a function that dispatches a sequence.
- type ParserDispatcher func(Sequence)
- // Parser represents a DEC ANSI compatible sequence parser.
- //
- // It uses a state machine to parse ANSI escape sequences and control
- // characters. The parser is designed to be used with a terminal emulator or
- // similar application that needs to parse ANSI escape sequences and control
- // characters.
- // See package [parser] for more information.
- //
- //go:generate go run ./gen.go
- type Parser struct {
- // Params contains the raw parameters of the sequence.
- // These parameters used when constructing CSI and DCS sequences.
- Params []int
- // Data contains the raw data of the sequence.
- // These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
- Data []byte
- // DataLen keeps track of the length of the data buffer.
- // If DataLen is -1, the data buffer is unlimited and will grow as needed.
- // Otherwise, DataLen is limited by the size of the Data buffer.
- DataLen int
- // ParamsLen keeps track of the number of parameters.
- // This is limited by the size of the Params buffer.
- ParamsLen int
- // Cmd contains the raw command along with the private marker and
- // intermediate bytes of the sequence.
- // The first lower byte contains the command byte, the next byte contains
- // the private marker, and the next byte contains the intermediate byte.
- Cmd int
- // RuneLen keeps track of the number of bytes collected for a UTF-8 rune.
- RuneLen int
- // RuneBuf contains the bytes collected for a UTF-8 rune.
- RuneBuf [utf8.MaxRune]byte
- // State is the current state of the parser.
- State byte
- }
- // NewParser returns a new parser with the given sizes allocated.
- // If dataSize is zero, the underlying data buffer will be unlimited and will
- // grow as needed.
- func NewParser(paramsSize, dataSize int) *Parser {
- s := &Parser{
- Params: make([]int, paramsSize),
- Data: make([]byte, dataSize),
- }
- if dataSize <= 0 {
- s.DataLen = -1
- }
- return s
- }
- // Reset resets the parser to its initial state.
- func (p *Parser) Reset() {
- p.clear()
- p.State = parser.GroundState
- }
- // clear clears the parser parameters and command.
- func (p *Parser) clear() {
- if len(p.Params) > 0 {
- p.Params[0] = parser.MissingParam
- }
- p.ParamsLen = 0
- p.Cmd = 0
- p.RuneLen = 0
- }
- // StateName returns the name of the current state.
- func (p *Parser) StateName() string {
- return parser.StateNames[p.State]
- }
- // Parse parses the given dispatcher and byte buffer.
- func (p *Parser) Parse(dispatcher ParserDispatcher, b []byte) {
- for i := 0; i < len(b); i++ {
- p.Advance(dispatcher, b[i], i < len(b)-1)
- }
- }
- // Advance advances the parser with the given dispatcher and byte.
- func (p *Parser) Advance(dispatcher ParserDispatcher, b byte, more bool) parser.Action {
- switch p.State {
- case parser.Utf8State:
- // We handle UTF-8 here.
- return p.advanceUtf8(dispatcher, b)
- default:
- return p.advance(dispatcher, b, more)
- }
- }
- func (p *Parser) collectRune(b byte) {
- if p.RuneLen < utf8.UTFMax {
- p.RuneBuf[p.RuneLen] = b
- p.RuneLen++
- }
- }
- func (p *Parser) advanceUtf8(dispatcher ParserDispatcher, b byte) parser.Action {
- // Collect UTF-8 rune bytes.
- p.collectRune(b)
- rw := utf8ByteLen(p.RuneBuf[0])
- if rw == -1 {
- // We panic here because the first byte comes from the state machine,
- // if this panics, it means there is a bug in the state machine!
- panic("invalid rune") // unreachable
- }
- if p.RuneLen < rw {
- return parser.NoneAction
- }
- // We have enough bytes to decode the rune
- bts := p.RuneBuf[:rw]
- r, _ := utf8.DecodeRune(bts)
- if dispatcher != nil {
- dispatcher(Rune(r))
- }
- p.State = parser.GroundState
- p.RuneLen = 0
- return parser.NoneAction
- }
- func (p *Parser) advance(d ParserDispatcher, b byte, more bool) parser.Action {
- state, action := parser.Table.Transition(p.State, b)
- // We need to clear the parser state if the state changes from EscapeState.
- // This is because when we enter the EscapeState, we don't get a chance to
- // clear the parser state. For example, when a sequence terminates with a
- // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
- // EscapeState. However, the parser state is not cleared in this case and
- // we need to clear it here before dispatching the esc sequence.
- if p.State != state {
- switch p.State {
- case parser.EscapeState:
- p.performAction(d, parser.ClearAction, b)
- }
- if action == parser.PutAction &&
- p.State == parser.DcsEntryState && state == parser.DcsStringState {
- // XXX: This is a special case where we need to start collecting
- // non-string parameterized data i.e. doesn't follow the ECMA-48 §
- // 5.4.1 string parameters format.
- p.performAction(d, parser.StartAction, 0)
- }
- }
- // Handle special cases
- switch {
- case b == ESC && p.State == parser.EscapeState:
- // Two ESCs in a row
- p.performAction(d, parser.ExecuteAction, b)
- if !more {
- // Two ESCs at the end of the buffer
- p.performAction(d, parser.ExecuteAction, b)
- }
- case b == ESC && !more:
- // Last byte is an ESC
- p.performAction(d, parser.ExecuteAction, b)
- case p.State == parser.EscapeState && b == 'P' && !more:
- // ESC P (DCS) at the end of the buffer
- p.performAction(d, parser.DispatchAction, b)
- case p.State == parser.EscapeState && b == 'X' && !more:
- // ESC X (SOS) at the end of the buffer
- p.performAction(d, parser.DispatchAction, b)
- case p.State == parser.EscapeState && b == '[' && !more:
- // ESC [ (CSI) at the end of the buffer
- p.performAction(d, parser.DispatchAction, b)
- case p.State == parser.EscapeState && b == ']' && !more:
- // ESC ] (OSC) at the end of the buffer
- p.performAction(d, parser.DispatchAction, b)
- case p.State == parser.EscapeState && b == '^' && !more:
- // ESC ^ (PM) at the end of the buffer
- p.performAction(d, parser.DispatchAction, b)
- case p.State == parser.EscapeState && b == '_' && !more:
- // ESC _ (APC) at the end of the buffer
- p.performAction(d, parser.DispatchAction, b)
- default:
- p.performAction(d, action, b)
- }
- p.State = state
- return action
- }
- func (p *Parser) performAction(dispatcher ParserDispatcher, action parser.Action, b byte) {
- switch action {
- case parser.IgnoreAction:
- break
- case parser.ClearAction:
- p.clear()
- case parser.PrintAction:
- if utf8ByteLen(b) > 1 {
- p.collectRune(b)
- } else if dispatcher != nil {
- dispatcher(Rune(b))
- }
- case parser.ExecuteAction:
- if dispatcher != nil {
- dispatcher(ControlCode(b))
- }
- case parser.MarkerAction:
- // Collect private marker
- // we only store the last marker
- p.Cmd &^= 0xff << parser.MarkerShift
- p.Cmd |= int(b) << parser.MarkerShift
- case parser.CollectAction:
- // Collect intermediate bytes
- // we only store the last intermediate byte
- p.Cmd &^= 0xff << parser.IntermedShift
- p.Cmd |= int(b) << parser.IntermedShift
- case parser.ParamAction:
- // Collect parameters
- if p.ParamsLen >= len(p.Params) {
- break
- }
- if b >= '0' && b <= '9' {
- if p.Params[p.ParamsLen] == parser.MissingParam {
- p.Params[p.ParamsLen] = 0
- }
- p.Params[p.ParamsLen] *= 10
- p.Params[p.ParamsLen] += int(b - '0')
- }
- if b == ':' {
- p.Params[p.ParamsLen] |= parser.HasMoreFlag
- }
- if b == ';' || b == ':' {
- p.ParamsLen++
- if p.ParamsLen < len(p.Params) {
- p.Params[p.ParamsLen] = parser.MissingParam
- }
- }
- case parser.StartAction:
- if p.DataLen < 0 {
- p.Data = make([]byte, 0)
- } else {
- p.DataLen = 0
- }
- if p.State >= parser.DcsEntryState && p.State <= parser.DcsStringState {
- // Collect the command byte for DCS
- p.Cmd |= int(b)
- } else {
- p.Cmd = parser.MissingCommand
- }
- case parser.PutAction:
- switch p.State {
- case parser.OscStringState:
- if b == ';' && p.Cmd == parser.MissingCommand {
- // Try to parse the command
- datalen := len(p.Data)
- if p.DataLen >= 0 {
- datalen = p.DataLen
- }
- for i := 0; i < datalen; i++ {
- d := p.Data[i]
- if d < '0' || d > '9' {
- break
- }
- if p.Cmd == parser.MissingCommand {
- p.Cmd = 0
- }
- p.Cmd *= 10
- p.Cmd += int(d - '0')
- }
- }
- }
- if p.DataLen < 0 {
- p.Data = append(p.Data, b)
- } else {
- if p.DataLen < len(p.Data) {
- p.Data[p.DataLen] = b
- p.DataLen++
- }
- }
- case parser.DispatchAction:
- // Increment the last parameter
- if p.ParamsLen > 0 && p.ParamsLen < len(p.Params)-1 ||
- p.ParamsLen == 0 && len(p.Params) > 0 && p.Params[0] != parser.MissingParam {
- p.ParamsLen++
- }
- if dispatcher == nil {
- break
- }
- var seq Sequence
- data := p.Data
- if p.DataLen >= 0 {
- data = data[:p.DataLen]
- }
- switch p.State {
- case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
- p.Cmd |= int(b)
- seq = CsiSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen]}
- case parser.EscapeState, parser.EscapeIntermediateState:
- p.Cmd |= int(b)
- seq = EscSequence(p.Cmd)
- case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
- seq = DcsSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen], Data: data}
- case parser.OscStringState:
- seq = OscSequence{Cmd: p.Cmd, Data: data}
- case parser.SosStringState:
- seq = SosSequence{Data: data}
- case parser.PmStringState:
- seq = PmSequence{Data: data}
- case parser.ApcStringState:
- seq = ApcSequence{Data: data}
- }
- dispatcher(seq)
- }
- }
- func utf8ByteLen(b byte) int {
- if b <= 0b0111_1111 { // 0x00-0x7F
- return 1
- } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
- return 2
- } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
- return 3
- } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
- return 4
- }
- return -1
- }
|