parser.go 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. package ansi
  2. import (
  3. "unicode/utf8"
  4. "github.com/charmbracelet/x/ansi/parser"
  5. )
  6. // ParserDispatcher is a function that dispatches a sequence.
  7. type ParserDispatcher func(Sequence)
  8. // Parser represents a DEC ANSI compatible sequence parser.
  9. //
  10. // It uses a state machine to parse ANSI escape sequences and control
  11. // characters. The parser is designed to be used with a terminal emulator or
  12. // similar application that needs to parse ANSI escape sequences and control
  13. // characters.
  14. // See package [parser] for more information.
  15. //
  16. //go:generate go run ./gen.go
  17. type Parser struct {
  18. // Params contains the raw parameters of the sequence.
  19. // These parameters used when constructing CSI and DCS sequences.
  20. Params []int
  21. // Data contains the raw data of the sequence.
  22. // These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
  23. Data []byte
  24. // DataLen keeps track of the length of the data buffer.
  25. // If DataLen is -1, the data buffer is unlimited and will grow as needed.
  26. // Otherwise, DataLen is limited by the size of the Data buffer.
  27. DataLen int
  28. // ParamsLen keeps track of the number of parameters.
  29. // This is limited by the size of the Params buffer.
  30. ParamsLen int
  31. // Cmd contains the raw command along with the private marker and
  32. // intermediate bytes of the sequence.
  33. // The first lower byte contains the command byte, the next byte contains
  34. // the private marker, and the next byte contains the intermediate byte.
  35. Cmd int
  36. // RuneLen keeps track of the number of bytes collected for a UTF-8 rune.
  37. RuneLen int
  38. // RuneBuf contains the bytes collected for a UTF-8 rune.
  39. RuneBuf [utf8.MaxRune]byte
  40. // State is the current state of the parser.
  41. State byte
  42. }
  43. // NewParser returns a new parser with the given sizes allocated.
  44. // If dataSize is zero, the underlying data buffer will be unlimited and will
  45. // grow as needed.
  46. func NewParser(paramsSize, dataSize int) *Parser {
  47. s := &Parser{
  48. Params: make([]int, paramsSize),
  49. Data: make([]byte, dataSize),
  50. }
  51. if dataSize <= 0 {
  52. s.DataLen = -1
  53. }
  54. return s
  55. }
  56. // Reset resets the parser to its initial state.
  57. func (p *Parser) Reset() {
  58. p.clear()
  59. p.State = parser.GroundState
  60. }
  61. // clear clears the parser parameters and command.
  62. func (p *Parser) clear() {
  63. if len(p.Params) > 0 {
  64. p.Params[0] = parser.MissingParam
  65. }
  66. p.ParamsLen = 0
  67. p.Cmd = 0
  68. p.RuneLen = 0
  69. }
  70. // StateName returns the name of the current state.
  71. func (p *Parser) StateName() string {
  72. return parser.StateNames[p.State]
  73. }
  74. // Parse parses the given dispatcher and byte buffer.
  75. func (p *Parser) Parse(dispatcher ParserDispatcher, b []byte) {
  76. for i := 0; i < len(b); i++ {
  77. p.Advance(dispatcher, b[i], i < len(b)-1)
  78. }
  79. }
  80. // Advance advances the parser with the given dispatcher and byte.
  81. func (p *Parser) Advance(dispatcher ParserDispatcher, b byte, more bool) parser.Action {
  82. switch p.State {
  83. case parser.Utf8State:
  84. // We handle UTF-8 here.
  85. return p.advanceUtf8(dispatcher, b)
  86. default:
  87. return p.advance(dispatcher, b, more)
  88. }
  89. }
  90. func (p *Parser) collectRune(b byte) {
  91. if p.RuneLen < utf8.UTFMax {
  92. p.RuneBuf[p.RuneLen] = b
  93. p.RuneLen++
  94. }
  95. }
  96. func (p *Parser) advanceUtf8(dispatcher ParserDispatcher, b byte) parser.Action {
  97. // Collect UTF-8 rune bytes.
  98. p.collectRune(b)
  99. rw := utf8ByteLen(p.RuneBuf[0])
  100. if rw == -1 {
  101. // We panic here because the first byte comes from the state machine,
  102. // if this panics, it means there is a bug in the state machine!
  103. panic("invalid rune") // unreachable
  104. }
  105. if p.RuneLen < rw {
  106. return parser.NoneAction
  107. }
  108. // We have enough bytes to decode the rune
  109. bts := p.RuneBuf[:rw]
  110. r, _ := utf8.DecodeRune(bts)
  111. if dispatcher != nil {
  112. dispatcher(Rune(r))
  113. }
  114. p.State = parser.GroundState
  115. p.RuneLen = 0
  116. return parser.NoneAction
  117. }
  118. func (p *Parser) advance(d ParserDispatcher, b byte, more bool) parser.Action {
  119. state, action := parser.Table.Transition(p.State, b)
  120. // We need to clear the parser state if the state changes from EscapeState.
  121. // This is because when we enter the EscapeState, we don't get a chance to
  122. // clear the parser state. For example, when a sequence terminates with a
  123. // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
  124. // EscapeState. However, the parser state is not cleared in this case and
  125. // we need to clear it here before dispatching the esc sequence.
  126. if p.State != state {
  127. switch p.State {
  128. case parser.EscapeState:
  129. p.performAction(d, parser.ClearAction, b)
  130. }
  131. if action == parser.PutAction &&
  132. p.State == parser.DcsEntryState && state == parser.DcsStringState {
  133. // XXX: This is a special case where we need to start collecting
  134. // non-string parameterized data i.e. doesn't follow the ECMA-48 §
  135. // 5.4.1 string parameters format.
  136. p.performAction(d, parser.StartAction, 0)
  137. }
  138. }
  139. // Handle special cases
  140. switch {
  141. case b == ESC && p.State == parser.EscapeState:
  142. // Two ESCs in a row
  143. p.performAction(d, parser.ExecuteAction, b)
  144. if !more {
  145. // Two ESCs at the end of the buffer
  146. p.performAction(d, parser.ExecuteAction, b)
  147. }
  148. case b == ESC && !more:
  149. // Last byte is an ESC
  150. p.performAction(d, parser.ExecuteAction, b)
  151. case p.State == parser.EscapeState && b == 'P' && !more:
  152. // ESC P (DCS) at the end of the buffer
  153. p.performAction(d, parser.DispatchAction, b)
  154. case p.State == parser.EscapeState && b == 'X' && !more:
  155. // ESC X (SOS) at the end of the buffer
  156. p.performAction(d, parser.DispatchAction, b)
  157. case p.State == parser.EscapeState && b == '[' && !more:
  158. // ESC [ (CSI) at the end of the buffer
  159. p.performAction(d, parser.DispatchAction, b)
  160. case p.State == parser.EscapeState && b == ']' && !more:
  161. // ESC ] (OSC) at the end of the buffer
  162. p.performAction(d, parser.DispatchAction, b)
  163. case p.State == parser.EscapeState && b == '^' && !more:
  164. // ESC ^ (PM) at the end of the buffer
  165. p.performAction(d, parser.DispatchAction, b)
  166. case p.State == parser.EscapeState && b == '_' && !more:
  167. // ESC _ (APC) at the end of the buffer
  168. p.performAction(d, parser.DispatchAction, b)
  169. default:
  170. p.performAction(d, action, b)
  171. }
  172. p.State = state
  173. return action
  174. }
  175. func (p *Parser) performAction(dispatcher ParserDispatcher, action parser.Action, b byte) {
  176. switch action {
  177. case parser.IgnoreAction:
  178. break
  179. case parser.ClearAction:
  180. p.clear()
  181. case parser.PrintAction:
  182. if utf8ByteLen(b) > 1 {
  183. p.collectRune(b)
  184. } else if dispatcher != nil {
  185. dispatcher(Rune(b))
  186. }
  187. case parser.ExecuteAction:
  188. if dispatcher != nil {
  189. dispatcher(ControlCode(b))
  190. }
  191. case parser.MarkerAction:
  192. // Collect private marker
  193. // we only store the last marker
  194. p.Cmd &^= 0xff << parser.MarkerShift
  195. p.Cmd |= int(b) << parser.MarkerShift
  196. case parser.CollectAction:
  197. // Collect intermediate bytes
  198. // we only store the last intermediate byte
  199. p.Cmd &^= 0xff << parser.IntermedShift
  200. p.Cmd |= int(b) << parser.IntermedShift
  201. case parser.ParamAction:
  202. // Collect parameters
  203. if p.ParamsLen >= len(p.Params) {
  204. break
  205. }
  206. if b >= '0' && b <= '9' {
  207. if p.Params[p.ParamsLen] == parser.MissingParam {
  208. p.Params[p.ParamsLen] = 0
  209. }
  210. p.Params[p.ParamsLen] *= 10
  211. p.Params[p.ParamsLen] += int(b - '0')
  212. }
  213. if b == ':' {
  214. p.Params[p.ParamsLen] |= parser.HasMoreFlag
  215. }
  216. if b == ';' || b == ':' {
  217. p.ParamsLen++
  218. if p.ParamsLen < len(p.Params) {
  219. p.Params[p.ParamsLen] = parser.MissingParam
  220. }
  221. }
  222. case parser.StartAction:
  223. if p.DataLen < 0 {
  224. p.Data = make([]byte, 0)
  225. } else {
  226. p.DataLen = 0
  227. }
  228. if p.State >= parser.DcsEntryState && p.State <= parser.DcsStringState {
  229. // Collect the command byte for DCS
  230. p.Cmd |= int(b)
  231. } else {
  232. p.Cmd = parser.MissingCommand
  233. }
  234. case parser.PutAction:
  235. switch p.State {
  236. case parser.OscStringState:
  237. if b == ';' && p.Cmd == parser.MissingCommand {
  238. // Try to parse the command
  239. datalen := len(p.Data)
  240. if p.DataLen >= 0 {
  241. datalen = p.DataLen
  242. }
  243. for i := 0; i < datalen; i++ {
  244. d := p.Data[i]
  245. if d < '0' || d > '9' {
  246. break
  247. }
  248. if p.Cmd == parser.MissingCommand {
  249. p.Cmd = 0
  250. }
  251. p.Cmd *= 10
  252. p.Cmd += int(d - '0')
  253. }
  254. }
  255. }
  256. if p.DataLen < 0 {
  257. p.Data = append(p.Data, b)
  258. } else {
  259. if p.DataLen < len(p.Data) {
  260. p.Data[p.DataLen] = b
  261. p.DataLen++
  262. }
  263. }
  264. case parser.DispatchAction:
  265. // Increment the last parameter
  266. if p.ParamsLen > 0 && p.ParamsLen < len(p.Params)-1 ||
  267. p.ParamsLen == 0 && len(p.Params) > 0 && p.Params[0] != parser.MissingParam {
  268. p.ParamsLen++
  269. }
  270. if dispatcher == nil {
  271. break
  272. }
  273. var seq Sequence
  274. data := p.Data
  275. if p.DataLen >= 0 {
  276. data = data[:p.DataLen]
  277. }
  278. switch p.State {
  279. case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
  280. p.Cmd |= int(b)
  281. seq = CsiSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen]}
  282. case parser.EscapeState, parser.EscapeIntermediateState:
  283. p.Cmd |= int(b)
  284. seq = EscSequence(p.Cmd)
  285. case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
  286. seq = DcsSequence{Cmd: p.Cmd, Params: p.Params[:p.ParamsLen], Data: data}
  287. case parser.OscStringState:
  288. seq = OscSequence{Cmd: p.Cmd, Data: data}
  289. case parser.SosStringState:
  290. seq = SosSequence{Data: data}
  291. case parser.PmStringState:
  292. seq = PmSequence{Data: data}
  293. case parser.ApcStringState:
  294. seq = ApcSequence{Data: data}
  295. }
  296. dispatcher(seq)
  297. }
  298. }
  299. func utf8ByteLen(b byte) int {
  300. if b <= 0b0111_1111 { // 0x00-0x7F
  301. return 1
  302. } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
  303. return 2
  304. } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
  305. return 3
  306. } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
  307. return 4
  308. }
  309. return -1
  310. }