parser.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417
  1. package ansi
  2. import (
  3. "unicode/utf8"
  4. "unsafe"
  5. "github.com/charmbracelet/x/ansi/parser"
  6. )
  7. // Parser represents a DEC ANSI compatible sequence parser.
  8. //
  9. // It uses a state machine to parse ANSI escape sequences and control
  10. // characters. The parser is designed to be used with a terminal emulator or
  11. // similar application that needs to parse ANSI escape sequences and control
  12. // characters.
  13. // See package [parser] for more information.
  14. //
  15. //go:generate go run ./gen.go
  16. type Parser struct {
  17. handler Handler
  18. // params contains the raw parameters of the sequence.
  19. // These parameters used when constructing CSI and DCS sequences.
  20. params []int
  21. // data contains the raw data of the sequence.
  22. // These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
  23. data []byte
  24. // dataLen keeps track of the length of the data buffer.
  25. // If dataLen is -1, the data buffer is unlimited and will grow as needed.
  26. // Otherwise, dataLen is limited by the size of the data buffer.
  27. dataLen int
  28. // paramsLen keeps track of the number of parameters.
  29. // This is limited by the size of the params buffer.
  30. //
  31. // This is also used when collecting UTF-8 runes to keep track of the
  32. // number of rune bytes collected.
  33. paramsLen int
  34. // cmd contains the raw command along with the private prefix and
  35. // intermediate bytes of the sequence.
  36. // The first lower byte contains the command byte, the next byte contains
  37. // the private prefix, and the next byte contains the intermediate byte.
  38. //
  39. // This is also used when collecting UTF-8 runes treating it as a slice of
  40. // 4 bytes.
  41. cmd int
  42. // state is the current state of the parser.
  43. state byte
  44. }
  45. // NewParser returns a new parser with the default settings.
  46. // The [Parser] uses a default size of 32 for the parameters and 64KB for the
  47. // data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the
  48. // size of the parameters and data buffer respectively.
  49. func NewParser() *Parser {
  50. p := new(Parser)
  51. p.SetParamsSize(parser.MaxParamsSize)
  52. p.SetDataSize(1024 * 64) // 64KB data buffer
  53. return p
  54. }
  55. // SetParamsSize sets the size of the parameters buffer.
  56. // This is used when constructing CSI and DCS sequences.
  57. func (p *Parser) SetParamsSize(size int) {
  58. p.params = make([]int, size)
  59. }
  60. // SetDataSize sets the size of the data buffer.
  61. // This is used when constructing OSC, DCS, SOS, PM, and APC sequences.
  62. // If size is less than or equal to 0, the data buffer is unlimited and will
  63. // grow as needed.
  64. func (p *Parser) SetDataSize(size int) {
  65. if size <= 0 {
  66. size = 0
  67. p.dataLen = -1
  68. }
  69. p.data = make([]byte, size)
  70. }
  71. // Params returns the list of parsed packed parameters.
  72. func (p *Parser) Params() Params {
  73. return unsafe.Slice((*Param)(unsafe.Pointer(&p.params[0])), p.paramsLen)
  74. }
  75. // Param returns the parameter at the given index and falls back to the default
  76. // value if the parameter is missing. If the index is out of bounds, it returns
  77. // the default value and false.
  78. func (p *Parser) Param(i, def int) (int, bool) {
  79. if i < 0 || i >= p.paramsLen {
  80. return def, false
  81. }
  82. return Param(p.params[i]).Param(def), true
  83. }
  84. // Command returns the packed command of the last dispatched sequence. Use
  85. // [Cmd] to unpack the command.
  86. func (p *Parser) Command() int {
  87. return p.cmd
  88. }
  89. // Rune returns the last dispatched sequence as a rune.
  90. func (p *Parser) Rune() rune {
  91. rw := utf8ByteLen(byte(p.cmd & 0xff))
  92. if rw == -1 {
  93. return utf8.RuneError
  94. }
  95. r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw])
  96. return r
  97. }
  98. // Control returns the last dispatched sequence as a control code.
  99. func (p *Parser) Control() byte {
  100. return byte(p.cmd & 0xff)
  101. }
  102. // Data returns the raw data of the last dispatched sequence.
  103. func (p *Parser) Data() []byte {
  104. return p.data[:p.dataLen]
  105. }
  106. // Reset resets the parser to its initial state.
  107. func (p *Parser) Reset() {
  108. p.clear()
  109. p.state = parser.GroundState
  110. }
  111. // clear clears the parser parameters and command.
  112. func (p *Parser) clear() {
  113. if len(p.params) > 0 {
  114. p.params[0] = parser.MissingParam
  115. }
  116. p.paramsLen = 0
  117. p.cmd = 0
  118. }
  119. // State returns the current state of the parser.
  120. func (p *Parser) State() parser.State {
  121. return p.state
  122. }
  123. // StateName returns the name of the current state.
  124. func (p *Parser) StateName() string {
  125. return parser.StateNames[p.state]
  126. }
  127. // Parse parses the given dispatcher and byte buffer.
  128. // Deprecated: Loop over the buffer and call [Parser.Advance] instead.
  129. func (p *Parser) Parse(b []byte) {
  130. for i := 0; i < len(b); i++ {
  131. p.Advance(b[i])
  132. }
  133. }
  134. // Advance advances the parser using the given byte. It returns the action
  135. // performed by the parser.
  136. func (p *Parser) Advance(b byte) parser.Action {
  137. switch p.state {
  138. case parser.Utf8State:
  139. // We handle UTF-8 here.
  140. return p.advanceUtf8(b)
  141. default:
  142. return p.advance(b)
  143. }
  144. }
  145. func (p *Parser) collectRune(b byte) {
  146. if p.paramsLen >= utf8.UTFMax {
  147. return
  148. }
  149. shift := p.paramsLen * 8
  150. p.cmd &^= 0xff << shift
  151. p.cmd |= int(b) << shift
  152. p.paramsLen++
  153. }
  154. func (p *Parser) advanceUtf8(b byte) parser.Action {
  155. // Collect UTF-8 rune bytes.
  156. p.collectRune(b)
  157. rw := utf8ByteLen(byte(p.cmd & 0xff))
  158. if rw == -1 {
  159. // We panic here because the first byte comes from the state machine,
  160. // if this panics, it means there is a bug in the state machine!
  161. panic("invalid rune") // unreachable
  162. }
  163. if p.paramsLen < rw {
  164. return parser.CollectAction
  165. }
  166. // We have enough bytes to decode the rune using unsafe
  167. if p.handler.Print != nil {
  168. p.handler.Print(p.Rune())
  169. }
  170. p.state = parser.GroundState
  171. p.paramsLen = 0
  172. return parser.PrintAction
  173. }
  174. func (p *Parser) advance(b byte) parser.Action {
  175. state, action := parser.Table.Transition(p.state, b)
  176. // We need to clear the parser state if the state changes from EscapeState.
  177. // This is because when we enter the EscapeState, we don't get a chance to
  178. // clear the parser state. For example, when a sequence terminates with a
  179. // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
  180. // EscapeState. However, the parser state is not cleared in this case and
  181. // we need to clear it here before dispatching the esc sequence.
  182. if p.state != state {
  183. if p.state == parser.EscapeState {
  184. p.performAction(parser.ClearAction, state, b)
  185. }
  186. if action == parser.PutAction &&
  187. p.state == parser.DcsEntryState && state == parser.DcsStringState {
  188. // XXX: This is a special case where we need to start collecting
  189. // non-string parameterized data i.e. doesn't follow the ECMA-48 §
  190. // 5.4.1 string parameters format.
  191. p.performAction(parser.StartAction, state, 0)
  192. }
  193. }
  194. // Handle special cases
  195. switch {
  196. case b == ESC && p.state == parser.EscapeState:
  197. // Two ESCs in a row
  198. p.performAction(parser.ExecuteAction, state, b)
  199. default:
  200. p.performAction(action, state, b)
  201. }
  202. p.state = state
  203. return action
  204. }
  205. func (p *Parser) parseStringCmd() {
  206. // Try to parse the command
  207. datalen := len(p.data)
  208. if p.dataLen >= 0 {
  209. datalen = p.dataLen
  210. }
  211. for i := 0; i < datalen; i++ {
  212. d := p.data[i]
  213. if d < '0' || d > '9' {
  214. break
  215. }
  216. if p.cmd == parser.MissingCommand {
  217. p.cmd = 0
  218. }
  219. p.cmd *= 10
  220. p.cmd += int(d - '0')
  221. }
  222. }
  223. func (p *Parser) performAction(action parser.Action, state parser.State, b byte) {
  224. switch action {
  225. case parser.IgnoreAction:
  226. break
  227. case parser.ClearAction:
  228. p.clear()
  229. case parser.PrintAction:
  230. p.cmd = int(b)
  231. if p.handler.Print != nil {
  232. p.handler.Print(rune(b))
  233. }
  234. case parser.ExecuteAction:
  235. p.cmd = int(b)
  236. if p.handler.Execute != nil {
  237. p.handler.Execute(b)
  238. }
  239. case parser.PrefixAction:
  240. // Collect private prefix
  241. // we only store the last prefix
  242. p.cmd &^= 0xff << parser.PrefixShift
  243. p.cmd |= int(b) << parser.PrefixShift
  244. case parser.CollectAction:
  245. if state == parser.Utf8State {
  246. // Reset the UTF-8 counter
  247. p.paramsLen = 0
  248. p.collectRune(b)
  249. } else {
  250. // Collect intermediate bytes
  251. // we only store the last intermediate byte
  252. p.cmd &^= 0xff << parser.IntermedShift
  253. p.cmd |= int(b) << parser.IntermedShift
  254. }
  255. case parser.ParamAction:
  256. // Collect parameters
  257. if p.paramsLen >= len(p.params) {
  258. break
  259. }
  260. if b >= '0' && b <= '9' {
  261. if p.params[p.paramsLen] == parser.MissingParam {
  262. p.params[p.paramsLen] = 0
  263. }
  264. p.params[p.paramsLen] *= 10
  265. p.params[p.paramsLen] += int(b - '0')
  266. }
  267. if b == ':' {
  268. p.params[p.paramsLen] |= parser.HasMoreFlag
  269. }
  270. if b == ';' || b == ':' {
  271. p.paramsLen++
  272. if p.paramsLen < len(p.params) {
  273. p.params[p.paramsLen] = parser.MissingParam
  274. }
  275. }
  276. case parser.StartAction:
  277. if p.dataLen < 0 && p.data != nil {
  278. p.data = p.data[:0]
  279. } else {
  280. p.dataLen = 0
  281. }
  282. if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState {
  283. // Collect the command byte for DCS
  284. p.cmd |= int(b)
  285. } else {
  286. p.cmd = parser.MissingCommand
  287. }
  288. case parser.PutAction:
  289. switch p.state {
  290. case parser.OscStringState:
  291. if b == ';' && p.cmd == parser.MissingCommand {
  292. p.parseStringCmd()
  293. }
  294. }
  295. if p.dataLen < 0 {
  296. p.data = append(p.data, b)
  297. } else {
  298. if p.dataLen < len(p.data) {
  299. p.data[p.dataLen] = b
  300. p.dataLen++
  301. }
  302. }
  303. case parser.DispatchAction:
  304. // Increment the last parameter
  305. if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
  306. p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
  307. p.paramsLen++
  308. }
  309. if p.state == parser.OscStringState && p.cmd == parser.MissingCommand {
  310. // Ensure we have a command for OSC
  311. p.parseStringCmd()
  312. }
  313. data := p.data
  314. if p.dataLen >= 0 {
  315. data = data[:p.dataLen]
  316. }
  317. switch p.state {
  318. case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
  319. p.cmd |= int(b)
  320. if p.handler.HandleCsi != nil {
  321. p.handler.HandleCsi(Cmd(p.cmd), p.Params())
  322. }
  323. case parser.EscapeState, parser.EscapeIntermediateState:
  324. p.cmd |= int(b)
  325. if p.handler.HandleEsc != nil {
  326. p.handler.HandleEsc(Cmd(p.cmd))
  327. }
  328. case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
  329. if p.handler.HandleDcs != nil {
  330. p.handler.HandleDcs(Cmd(p.cmd), p.Params(), data)
  331. }
  332. case parser.OscStringState:
  333. if p.handler.HandleOsc != nil {
  334. p.handler.HandleOsc(p.cmd, data)
  335. }
  336. case parser.SosStringState:
  337. if p.handler.HandleSos != nil {
  338. p.handler.HandleSos(data)
  339. }
  340. case parser.PmStringState:
  341. if p.handler.HandlePm != nil {
  342. p.handler.HandlePm(data)
  343. }
  344. case parser.ApcStringState:
  345. if p.handler.HandleApc != nil {
  346. p.handler.HandleApc(data)
  347. }
  348. }
  349. }
  350. }
  351. func utf8ByteLen(b byte) int {
  352. if b <= 0b0111_1111 { // 0x00-0x7F
  353. return 1
  354. } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
  355. return 2
  356. } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
  357. return 3
  358. } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
  359. return 4
  360. }
  361. return -1
  362. }