parser.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259
  1. // Package parser contains stuff that are related to parsing a Markdown text.
  2. package parser
  3. import (
  4. "fmt"
  5. "strings"
  6. "sync"
  7. "github.com/yuin/goldmark/ast"
  8. "github.com/yuin/goldmark/text"
  9. "github.com/yuin/goldmark/util"
  10. )
  11. // A Reference interface represents a link reference in Markdown text.
  12. type Reference interface {
  13. // String implements Stringer.
  14. String() string
  15. // Label returns a label of the reference.
  16. Label() []byte
  17. // Destination returns a destination(URL) of the reference.
  18. Destination() []byte
  19. // Title returns a title of the reference.
  20. Title() []byte
  21. }
  22. type reference struct {
  23. label []byte
  24. destination []byte
  25. title []byte
  26. }
  27. // NewReference returns a new Reference.
  28. func NewReference(label, destination, title []byte) Reference {
  29. return &reference{label, destination, title}
  30. }
  31. func (r *reference) Label() []byte {
  32. return r.label
  33. }
  34. func (r *reference) Destination() []byte {
  35. return r.destination
  36. }
  37. func (r *reference) Title() []byte {
  38. return r.title
  39. }
  40. func (r *reference) String() string {
  41. return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
  42. }
  43. // An IDs interface is a collection of the element ids.
  44. type IDs interface {
  45. // Generate generates a new element id.
  46. Generate(value []byte, kind ast.NodeKind) []byte
  47. // Put puts a given element id to the used ids table.
  48. Put(value []byte)
  49. }
  50. type ids struct {
  51. values map[string]bool
  52. }
  53. func newIDs() IDs {
  54. return &ids{
  55. values: map[string]bool{},
  56. }
  57. }
  58. func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
  59. value = util.TrimLeftSpace(value)
  60. value = util.TrimRightSpace(value)
  61. result := []byte{}
  62. for i := 0; i < len(value); {
  63. v := value[i]
  64. l := util.UTF8Len(v)
  65. i += int(l)
  66. if l != 1 {
  67. continue
  68. }
  69. if util.IsAlphaNumeric(v) {
  70. if 'A' <= v && v <= 'Z' {
  71. v += 'a' - 'A'
  72. }
  73. result = append(result, v)
  74. } else if util.IsSpace(v) || v == '-' || v == '_' {
  75. result = append(result, '-')
  76. }
  77. }
  78. if len(result) == 0 {
  79. if kind == ast.KindHeading {
  80. result = []byte("heading")
  81. } else {
  82. result = []byte("id")
  83. }
  84. }
  85. if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
  86. s.values[util.BytesToReadOnlyString(result)] = true
  87. return result
  88. }
  89. for i := 1; ; i++ {
  90. newResult := fmt.Sprintf("%s-%d", result, i)
  91. if _, ok := s.values[newResult]; !ok {
  92. s.values[newResult] = true
  93. return []byte(newResult)
  94. }
  95. }
  96. }
  97. func (s *ids) Put(value []byte) {
  98. s.values[util.BytesToReadOnlyString(value)] = true
  99. }
  100. // ContextKey is a key that is used to set arbitrary values to the context.
  101. type ContextKey int
  102. // ContextKeyMax is a maximum value of the ContextKey.
  103. var ContextKeyMax ContextKey
  104. // NewContextKey return a new ContextKey value.
  105. func NewContextKey() ContextKey {
  106. ContextKeyMax++
  107. return ContextKeyMax
  108. }
  109. // A Context interface holds a information that are necessary to parse
  110. // Markdown text.
  111. type Context interface {
  112. // String implements Stringer.
  113. String() string
  114. // Get returns a value associated with the given key.
  115. Get(ContextKey) interface{}
  116. // ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
  117. ComputeIfAbsent(ContextKey, func() interface{}) interface{}
  118. // Set sets the given value to the context.
  119. Set(ContextKey, interface{})
  120. // AddReference adds the given reference to this context.
  121. AddReference(Reference)
  122. // Reference returns (a reference, true) if a reference associated with
  123. // the given label exists, otherwise (nil, false).
  124. Reference(label string) (Reference, bool)
  125. // References returns a list of references.
  126. References() []Reference
  127. // IDs returns a collection of the element ids.
  128. IDs() IDs
  129. // BlockOffset returns a first non-space character position on current line.
  130. // This value is valid only for BlockParser.Open.
  131. // BlockOffset returns -1 if current line is blank.
  132. BlockOffset() int
  133. // BlockOffset sets a first non-space character position on current line.
  134. // This value is valid only for BlockParser.Open.
  135. SetBlockOffset(int)
  136. // BlockIndent returns an indent width on current line.
  137. // This value is valid only for BlockParser.Open.
  138. // BlockIndent returns -1 if current line is blank.
  139. BlockIndent() int
  140. // BlockIndent sets an indent width on current line.
  141. // This value is valid only for BlockParser.Open.
  142. SetBlockIndent(int)
  143. // FirstDelimiter returns a first delimiter of the current delimiter list.
  144. FirstDelimiter() *Delimiter
  145. // LastDelimiter returns a last delimiter of the current delimiter list.
  146. LastDelimiter() *Delimiter
  147. // PushDelimiter appends the given delimiter to the tail of the current
  148. // delimiter list.
  149. PushDelimiter(delimiter *Delimiter)
  150. // RemoveDelimiter removes the given delimiter from the current delimiter list.
  151. RemoveDelimiter(d *Delimiter)
  152. // ClearDelimiters clears the current delimiter list.
  153. ClearDelimiters(bottom ast.Node)
  154. // OpenedBlocks returns a list of nodes that are currently in parsing.
  155. OpenedBlocks() []Block
  156. // SetOpenedBlocks sets a list of nodes that are currently in parsing.
  157. SetOpenedBlocks([]Block)
  158. // LastOpenedBlock returns a last node that is currently in parsing.
  159. LastOpenedBlock() Block
  160. // IsInLinkLabel returns true if current position seems to be in link label.
  161. IsInLinkLabel() bool
  162. }
  163. // A ContextConfig struct is a data structure that holds configuration of the Context.
  164. type ContextConfig struct {
  165. IDs IDs
  166. }
  167. // An ContextOption is a functional option type for the Context.
  168. type ContextOption func(*ContextConfig)
  169. // WithIDs is a functional option for the Context.
  170. func WithIDs(ids IDs) ContextOption {
  171. return func(c *ContextConfig) {
  172. c.IDs = ids
  173. }
  174. }
  175. type parseContext struct {
  176. store []interface{}
  177. ids IDs
  178. refs map[string]Reference
  179. blockOffset int
  180. blockIndent int
  181. delimiters *Delimiter
  182. lastDelimiter *Delimiter
  183. openedBlocks []Block
  184. }
  185. // NewContext returns a new Context.
  186. func NewContext(options ...ContextOption) Context {
  187. cfg := &ContextConfig{
  188. IDs: newIDs(),
  189. }
  190. for _, option := range options {
  191. option(cfg)
  192. }
  193. return &parseContext{
  194. store: make([]interface{}, ContextKeyMax+1),
  195. refs: map[string]Reference{},
  196. ids: cfg.IDs,
  197. blockOffset: -1,
  198. blockIndent: -1,
  199. delimiters: nil,
  200. lastDelimiter: nil,
  201. openedBlocks: []Block{},
  202. }
  203. }
  204. func (p *parseContext) Get(key ContextKey) interface{} {
  205. return p.store[key]
  206. }
  207. func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
  208. v := p.store[key]
  209. if v == nil {
  210. v = f()
  211. p.store[key] = v
  212. }
  213. return v
  214. }
  215. func (p *parseContext) Set(key ContextKey, value interface{}) {
  216. p.store[key] = value
  217. }
  218. func (p *parseContext) IDs() IDs {
  219. return p.ids
  220. }
  221. func (p *parseContext) BlockOffset() int {
  222. return p.blockOffset
  223. }
  224. func (p *parseContext) SetBlockOffset(v int) {
  225. p.blockOffset = v
  226. }
  227. func (p *parseContext) BlockIndent() int {
  228. return p.blockIndent
  229. }
  230. func (p *parseContext) SetBlockIndent(v int) {
  231. p.blockIndent = v
  232. }
  233. func (p *parseContext) LastDelimiter() *Delimiter {
  234. return p.lastDelimiter
  235. }
  236. func (p *parseContext) FirstDelimiter() *Delimiter {
  237. return p.delimiters
  238. }
  239. func (p *parseContext) PushDelimiter(d *Delimiter) {
  240. if p.delimiters == nil {
  241. p.delimiters = d
  242. p.lastDelimiter = d
  243. } else {
  244. l := p.lastDelimiter
  245. p.lastDelimiter = d
  246. l.NextDelimiter = d
  247. d.PreviousDelimiter = l
  248. }
  249. }
  250. func (p *parseContext) RemoveDelimiter(d *Delimiter) {
  251. if d.PreviousDelimiter == nil {
  252. p.delimiters = d.NextDelimiter
  253. } else {
  254. d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
  255. if d.NextDelimiter != nil {
  256. d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
  257. }
  258. }
  259. if d.NextDelimiter == nil {
  260. p.lastDelimiter = d.PreviousDelimiter
  261. }
  262. if p.delimiters != nil {
  263. p.delimiters.PreviousDelimiter = nil
  264. }
  265. if p.lastDelimiter != nil {
  266. p.lastDelimiter.NextDelimiter = nil
  267. }
  268. d.NextDelimiter = nil
  269. d.PreviousDelimiter = nil
  270. if d.Length != 0 {
  271. ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
  272. } else {
  273. d.Parent().RemoveChild(d.Parent(), d)
  274. }
  275. }
  276. func (p *parseContext) ClearDelimiters(bottom ast.Node) {
  277. if p.lastDelimiter == nil {
  278. return
  279. }
  280. var c ast.Node
  281. for c = p.lastDelimiter; c != nil && c != bottom; {
  282. prev := c.PreviousSibling()
  283. if d, ok := c.(*Delimiter); ok {
  284. p.RemoveDelimiter(d)
  285. }
  286. c = prev
  287. }
  288. }
  289. func (p *parseContext) AddReference(ref Reference) {
  290. key := util.ToLinkReference(ref.Label())
  291. if _, ok := p.refs[key]; !ok {
  292. p.refs[key] = ref
  293. }
  294. }
  295. func (p *parseContext) Reference(label string) (Reference, bool) {
  296. v, ok := p.refs[label]
  297. return v, ok
  298. }
  299. func (p *parseContext) References() []Reference {
  300. ret := make([]Reference, 0, len(p.refs))
  301. for _, v := range p.refs {
  302. ret = append(ret, v)
  303. }
  304. return ret
  305. }
  306. func (p *parseContext) String() string {
  307. refs := []string{}
  308. for _, r := range p.refs {
  309. refs = append(refs, r.String())
  310. }
  311. return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
  312. }
  313. func (p *parseContext) OpenedBlocks() []Block {
  314. return p.openedBlocks
  315. }
  316. func (p *parseContext) SetOpenedBlocks(v []Block) {
  317. p.openedBlocks = v
  318. }
  319. func (p *parseContext) LastOpenedBlock() Block {
  320. if l := len(p.openedBlocks); l != 0 {
  321. return p.openedBlocks[l-1]
  322. }
  323. return Block{}
  324. }
  325. func (p *parseContext) IsInLinkLabel() bool {
  326. tlist := p.Get(linkLabelStateKey)
  327. return tlist != nil
  328. }
  329. // State represents parser's state.
  330. // State is designed to use as a bit flag.
  331. type State int
  332. const (
  333. // None is a default value of the [State].
  334. None State = 1 << iota
  335. // Continue indicates parser can continue parsing.
  336. Continue
  337. // Close indicates parser cannot parse anymore.
  338. Close
  339. // HasChildren indicates parser may have child blocks.
  340. HasChildren
  341. // NoChildren indicates parser does not have child blocks.
  342. NoChildren
  343. // RequireParagraph indicates parser requires that the last node
  344. // must be a paragraph and is not converted to other nodes by
  345. // ParagraphTransformers.
  346. RequireParagraph
  347. )
  348. // A Config struct is a data structure that holds configuration of the Parser.
  349. type Config struct {
  350. Options map[OptionName]interface{}
  351. BlockParsers util.PrioritizedSlice /*<BlockParser>*/
  352. InlineParsers util.PrioritizedSlice /*<InlineParser>*/
  353. ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
  354. ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
  355. EscapedSpace bool
  356. }
  357. // NewConfig returns a new Config.
  358. func NewConfig() *Config {
  359. return &Config{
  360. Options: map[OptionName]interface{}{},
  361. BlockParsers: util.PrioritizedSlice{},
  362. InlineParsers: util.PrioritizedSlice{},
  363. ParagraphTransformers: util.PrioritizedSlice{},
  364. ASTTransformers: util.PrioritizedSlice{},
  365. }
  366. }
  367. // An Option interface is a functional option type for the Parser.
  368. type Option interface {
  369. SetParserOption(*Config)
  370. }
  371. // OptionName is a name of parser options.
  372. type OptionName string
  373. // Attribute is an option name that spacify attributes of elements.
  374. const optAttribute OptionName = "Attribute"
  375. type withAttribute struct {
  376. }
  377. func (o *withAttribute) SetParserOption(c *Config) {
  378. c.Options[optAttribute] = true
  379. }
  380. // WithAttribute is a functional option that enables custom attributes.
  381. func WithAttribute() Option {
  382. return &withAttribute{}
  383. }
  384. // A Parser interface parses Markdown text into AST nodes.
  385. type Parser interface {
  386. // Parse parses the given Markdown text into AST nodes.
  387. Parse(reader text.Reader, opts ...ParseOption) ast.Node
  388. // AddOption adds the given option to this parser.
  389. AddOptions(...Option)
  390. }
  391. // A SetOptioner interface sets the given option to the object.
  392. type SetOptioner interface {
  393. // SetOption sets the given option to the object.
  394. // Unacceptable options may be passed.
  395. // Thus implementations must ignore unacceptable options.
  396. SetOption(name OptionName, value interface{})
  397. }
  398. // A BlockParser interface parses a block level element like Paragraph, List,
  399. // Blockquote etc.
  400. type BlockParser interface {
  401. // Trigger returns a list of characters that triggers Parse method of
  402. // this parser.
  403. // If Trigger returns a nil, Open will be called with any lines.
  404. Trigger() []byte
  405. // Open parses the current line and returns a result of parsing.
  406. //
  407. // Open must not parse beyond the current line.
  408. // If Open has been able to parse the current line, Open must advance a reader
  409. // position by consumed byte length.
  410. //
  411. // If Open has not been able to parse the current line, Open should returns
  412. // (nil, NoChildren). If Open has been able to parse the current line, Open
  413. // should returns a new Block node and returns HasChildren or NoChildren.
  414. Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
  415. // Continue parses the current line and returns a result of parsing.
  416. //
  417. // Continue must not parse beyond the current line.
  418. // If Continue has been able to parse the current line, Continue must advance
  419. // a reader position by consumed byte length.
  420. //
  421. // If Continue has not been able to parse the current line, Continue should
  422. // returns Close. If Continue has been able to parse the current line,
  423. // Continue should returns (Continue | NoChildren) or
  424. // (Continue | HasChildren)
  425. Continue(node ast.Node, reader text.Reader, pc Context) State
  426. // Close will be called when the parser returns Close.
  427. Close(node ast.Node, reader text.Reader, pc Context)
  428. // CanInterruptParagraph returns true if the parser can interrupt paragraphs,
  429. // otherwise false.
  430. CanInterruptParagraph() bool
  431. // CanAcceptIndentedLine returns true if the parser can open new node when
  432. // the given line is being indented more than 3 spaces.
  433. CanAcceptIndentedLine() bool
  434. }
  435. // An InlineParser interface parses an inline level element like CodeSpan, Link etc.
  436. type InlineParser interface {
  437. // Trigger returns a list of characters that triggers Parse method of
  438. // this parser.
  439. // Trigger characters must be a punctuation or a halfspace.
  440. // Halfspaces triggers this parser when character is any spaces characters or
  441. // a head of line
  442. Trigger() []byte
  443. // Parse parse the given block into an inline node.
  444. //
  445. // Parse can parse beyond the current line.
  446. // If Parse has been able to parse the current line, it must advance a reader
  447. // position by consumed byte length.
  448. Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
  449. }
  450. // A CloseBlocker interface is a callback function that will be
  451. // called when block is closed in the inline parsing.
  452. type CloseBlocker interface {
  453. // CloseBlock will be called when a block is closed.
  454. CloseBlock(parent ast.Node, block text.Reader, pc Context)
  455. }
  456. // A ParagraphTransformer transforms parsed Paragraph nodes.
  457. // For example, link references are searched in parsed Paragraphs.
  458. type ParagraphTransformer interface {
  459. // Transform transforms the given paragraph.
  460. Transform(node *ast.Paragraph, reader text.Reader, pc Context)
  461. }
  462. // ASTTransformer transforms entire Markdown document AST tree.
  463. type ASTTransformer interface {
  464. // Transform transforms the given AST tree.
  465. Transform(node *ast.Document, reader text.Reader, pc Context)
  466. }
  467. // DefaultBlockParsers returns a new list of default BlockParsers.
  468. // Priorities of default BlockParsers are:
  469. //
  470. // SetextHeadingParser, 100
  471. // ThematicBreakParser, 200
  472. // ListParser, 300
  473. // ListItemParser, 400
  474. // CodeBlockParser, 500
  475. // ATXHeadingParser, 600
  476. // FencedCodeBlockParser, 700
  477. // BlockquoteParser, 800
  478. // HTMLBlockParser, 900
  479. // ParagraphParser, 1000
  480. func DefaultBlockParsers() []util.PrioritizedValue {
  481. return []util.PrioritizedValue{
  482. util.Prioritized(NewSetextHeadingParser(), 100),
  483. util.Prioritized(NewThematicBreakParser(), 200),
  484. util.Prioritized(NewListParser(), 300),
  485. util.Prioritized(NewListItemParser(), 400),
  486. util.Prioritized(NewCodeBlockParser(), 500),
  487. util.Prioritized(NewATXHeadingParser(), 600),
  488. util.Prioritized(NewFencedCodeBlockParser(), 700),
  489. util.Prioritized(NewBlockquoteParser(), 800),
  490. util.Prioritized(NewHTMLBlockParser(), 900),
  491. util.Prioritized(NewParagraphParser(), 1000),
  492. }
  493. }
  494. // DefaultInlineParsers returns a new list of default InlineParsers.
  495. // Priorities of default InlineParsers are:
  496. //
  497. // CodeSpanParser, 100
  498. // LinkParser, 200
  499. // AutoLinkParser, 300
  500. // RawHTMLParser, 400
  501. // EmphasisParser, 500
  502. func DefaultInlineParsers() []util.PrioritizedValue {
  503. return []util.PrioritizedValue{
  504. util.Prioritized(NewCodeSpanParser(), 100),
  505. util.Prioritized(NewLinkParser(), 200),
  506. util.Prioritized(NewAutoLinkParser(), 300),
  507. util.Prioritized(NewRawHTMLParser(), 400),
  508. util.Prioritized(NewEmphasisParser(), 500),
  509. }
  510. }
  511. // DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
  512. // Priorities of default ParagraphTransformers are:
  513. //
  514. // LinkReferenceParagraphTransformer, 100
  515. func DefaultParagraphTransformers() []util.PrioritizedValue {
  516. return []util.PrioritizedValue{
  517. util.Prioritized(LinkReferenceParagraphTransformer, 100),
  518. }
  519. }
  520. // A Block struct holds a node and correspond parser pair.
  521. type Block struct {
  522. // Node is a BlockNode.
  523. Node ast.Node
  524. // Parser is a BlockParser.
  525. Parser BlockParser
  526. }
  527. type parser struct {
  528. options map[OptionName]interface{}
  529. blockParsers [256][]BlockParser
  530. freeBlockParsers []BlockParser
  531. inlineParsers [256][]InlineParser
  532. closeBlockers []CloseBlocker
  533. paragraphTransformers []ParagraphTransformer
  534. astTransformers []ASTTransformer
  535. escapedSpace bool
  536. config *Config
  537. initSync sync.Once
  538. }
  539. type withBlockParsers struct {
  540. value []util.PrioritizedValue
  541. }
  542. func (o *withBlockParsers) SetParserOption(c *Config) {
  543. c.BlockParsers = append(c.BlockParsers, o.value...)
  544. }
  545. // WithBlockParsers is a functional option that allow you to add
  546. // BlockParsers to the parser.
  547. func WithBlockParsers(bs ...util.PrioritizedValue) Option {
  548. return &withBlockParsers{bs}
  549. }
  550. type withInlineParsers struct {
  551. value []util.PrioritizedValue
  552. }
  553. func (o *withInlineParsers) SetParserOption(c *Config) {
  554. c.InlineParsers = append(c.InlineParsers, o.value...)
  555. }
  556. // WithInlineParsers is a functional option that allow you to add
  557. // InlineParsers to the parser.
  558. func WithInlineParsers(bs ...util.PrioritizedValue) Option {
  559. return &withInlineParsers{bs}
  560. }
  561. type withParagraphTransformers struct {
  562. value []util.PrioritizedValue
  563. }
  564. func (o *withParagraphTransformers) SetParserOption(c *Config) {
  565. c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
  566. }
  567. // WithParagraphTransformers is a functional option that allow you to add
  568. // ParagraphTransformers to the parser.
  569. func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
  570. return &withParagraphTransformers{ps}
  571. }
  572. type withASTTransformers struct {
  573. value []util.PrioritizedValue
  574. }
  575. func (o *withASTTransformers) SetParserOption(c *Config) {
  576. c.ASTTransformers = append(c.ASTTransformers, o.value...)
  577. }
  578. // WithASTTransformers is a functional option that allow you to add
  579. // ASTTransformers to the parser.
  580. func WithASTTransformers(ps ...util.PrioritizedValue) Option {
  581. return &withASTTransformers{ps}
  582. }
  583. type withEscapedSpace struct {
  584. }
  585. func (o *withEscapedSpace) SetParserOption(c *Config) {
  586. c.EscapedSpace = true
  587. }
  588. // WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
  589. func WithEscapedSpace() Option {
  590. return &withEscapedSpace{}
  591. }
  592. type withOption struct {
  593. name OptionName
  594. value interface{}
  595. }
  596. func (o *withOption) SetParserOption(c *Config) {
  597. c.Options[o.name] = o.value
  598. }
  599. // WithOption is a functional option that allow you to set
  600. // an arbitrary option to the parser.
  601. func WithOption(name OptionName, value interface{}) Option {
  602. return &withOption{name, value}
  603. }
  604. // NewParser returns a new Parser with given options.
  605. func NewParser(options ...Option) Parser {
  606. config := NewConfig()
  607. for _, opt := range options {
  608. opt.SetParserOption(config)
  609. }
  610. p := &parser{
  611. options: map[OptionName]interface{}{},
  612. config: config,
  613. }
  614. return p
  615. }
  616. func (p *parser) AddOptions(opts ...Option) {
  617. for _, opt := range opts {
  618. opt.SetParserOption(p.config)
  619. }
  620. }
  621. func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
  622. bp, ok := v.Value.(BlockParser)
  623. if !ok {
  624. panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
  625. }
  626. tcs := bp.Trigger()
  627. so, ok := v.Value.(SetOptioner)
  628. if ok {
  629. for oname, ovalue := range options {
  630. so.SetOption(oname, ovalue)
  631. }
  632. }
  633. if tcs == nil {
  634. p.freeBlockParsers = append(p.freeBlockParsers, bp)
  635. } else {
  636. for _, tc := range tcs {
  637. if p.blockParsers[tc] == nil {
  638. p.blockParsers[tc] = []BlockParser{}
  639. }
  640. p.blockParsers[tc] = append(p.blockParsers[tc], bp)
  641. }
  642. }
  643. }
  644. func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
  645. ip, ok := v.Value.(InlineParser)
  646. if !ok {
  647. panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
  648. }
  649. tcs := ip.Trigger()
  650. so, ok := v.Value.(SetOptioner)
  651. if ok {
  652. for oname, ovalue := range options {
  653. so.SetOption(oname, ovalue)
  654. }
  655. }
  656. if cb, ok := ip.(CloseBlocker); ok {
  657. p.closeBlockers = append(p.closeBlockers, cb)
  658. }
  659. for _, tc := range tcs {
  660. if p.inlineParsers[tc] == nil {
  661. p.inlineParsers[tc] = []InlineParser{}
  662. }
  663. p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
  664. }
  665. }
  666. func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
  667. pt, ok := v.Value.(ParagraphTransformer)
  668. if !ok {
  669. panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
  670. }
  671. so, ok := v.Value.(SetOptioner)
  672. if ok {
  673. for oname, ovalue := range options {
  674. so.SetOption(oname, ovalue)
  675. }
  676. }
  677. p.paragraphTransformers = append(p.paragraphTransformers, pt)
  678. }
  679. func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
  680. at, ok := v.Value.(ASTTransformer)
  681. if !ok {
  682. panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
  683. }
  684. so, ok := v.Value.(SetOptioner)
  685. if ok {
  686. for oname, ovalue := range options {
  687. so.SetOption(oname, ovalue)
  688. }
  689. }
  690. p.astTransformers = append(p.astTransformers, at)
  691. }
  692. // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
  693. type ParseConfig struct {
  694. Context Context
  695. }
  696. // A ParseOption is a functional option type for the Parser.Parse.
  697. type ParseOption func(c *ParseConfig)
  698. // WithContext is a functional option that allow you to override
  699. // a default context.
  700. func WithContext(context Context) ParseOption {
  701. return func(c *ParseConfig) {
  702. c.Context = context
  703. }
  704. }
  705. func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
  706. p.initSync.Do(func() {
  707. p.config.BlockParsers.Sort()
  708. for _, v := range p.config.BlockParsers {
  709. p.addBlockParser(v, p.config.Options)
  710. }
  711. for i := range p.blockParsers {
  712. if p.blockParsers[i] != nil {
  713. p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
  714. }
  715. }
  716. p.config.InlineParsers.Sort()
  717. for _, v := range p.config.InlineParsers {
  718. p.addInlineParser(v, p.config.Options)
  719. }
  720. p.config.ParagraphTransformers.Sort()
  721. for _, v := range p.config.ParagraphTransformers {
  722. p.addParagraphTransformer(v, p.config.Options)
  723. }
  724. p.config.ASTTransformers.Sort()
  725. for _, v := range p.config.ASTTransformers {
  726. p.addASTTransformer(v, p.config.Options)
  727. }
  728. p.escapedSpace = p.config.EscapedSpace
  729. p.config = nil
  730. })
  731. c := &ParseConfig{}
  732. for _, opt := range opts {
  733. opt(c)
  734. }
  735. if c.Context == nil {
  736. c.Context = NewContext()
  737. }
  738. pc := c.Context
  739. root := ast.NewDocument()
  740. p.parseBlocks(root, reader, pc)
  741. blockReader := text.NewBlockReader(reader.Source(), nil)
  742. p.walkBlock(root, func(node ast.Node) {
  743. p.parseBlock(blockReader, node, pc)
  744. })
  745. for _, at := range p.astTransformers {
  746. at.Transform(root, reader, pc)
  747. }
  748. // root.Dump(reader.Source(), 0)
  749. return root
  750. }
  751. func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
  752. for _, pt := range p.paragraphTransformers {
  753. pt.Transform(node, reader, pc)
  754. if node.Parent() == nil {
  755. return true
  756. }
  757. }
  758. return false
  759. }
  760. func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
  761. blocks := pc.OpenedBlocks()
  762. for i := from; i >= to; i-- {
  763. node := blocks[i].Node
  764. paragraph, ok := node.(*ast.Paragraph)
  765. if ok && node.Parent() != nil {
  766. p.transformParagraph(paragraph, reader, pc)
  767. }
  768. if node.Parent() != nil { // closes only if node has not been transformed
  769. blocks[i].Parser.Close(blocks[i].Node, reader, pc)
  770. }
  771. }
  772. if from == len(blocks)-1 {
  773. blocks = blocks[0:to]
  774. } else {
  775. blocks = append(blocks[0:to], blocks[from+1:]...)
  776. }
  777. pc.SetOpenedBlocks(blocks)
  778. }
  779. type blockOpenResult int
  780. const (
  781. paragraphContinuation blockOpenResult = iota + 1
  782. newBlocksOpened
  783. noBlocksOpened
  784. )
  785. func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
  786. result := blockOpenResult(noBlocksOpened)
  787. continuable := false
  788. lastBlock := pc.LastOpenedBlock()
  789. if lastBlock.Node != nil {
  790. continuable = ast.IsParagraph(lastBlock.Node)
  791. }
  792. retry:
  793. var bps []BlockParser
  794. line, _ := reader.PeekLine()
  795. w, pos := util.IndentWidth(line, reader.LineOffset())
  796. if w >= len(line) {
  797. pc.SetBlockOffset(-1)
  798. pc.SetBlockIndent(-1)
  799. } else {
  800. pc.SetBlockOffset(pos)
  801. pc.SetBlockIndent(w)
  802. }
  803. if line == nil || line[0] == '\n' {
  804. goto continuable
  805. }
  806. bps = p.freeBlockParsers
  807. if pos < len(line) {
  808. bps = p.blockParsers[line[pos]]
  809. if bps == nil {
  810. bps = p.freeBlockParsers
  811. }
  812. }
  813. if bps == nil {
  814. goto continuable
  815. }
  816. for _, bp := range bps {
  817. if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
  818. continue
  819. }
  820. if w > 3 && !bp.CanAcceptIndentedLine() {
  821. continue
  822. }
  823. lastBlock = pc.LastOpenedBlock()
  824. last := lastBlock.Node
  825. node, state := bp.Open(parent, reader, pc)
  826. if node != nil {
  827. // Parser requires last node to be a paragraph.
  828. // With table extension:
  829. //
  830. // 0
  831. // -:
  832. // -
  833. //
  834. // '-' on 3rd line seems a Setext heading because 1st and 2nd lines
  835. // are being paragraph when the Settext heading parser tries to parse the 3rd
  836. // line.
  837. // But 1st line and 2nd line are a table. Thus this paragraph will be transformed
  838. // by a paragraph transformer. So this text should be converted to a table and
  839. // an empty list.
  840. if state&RequireParagraph != 0 {
  841. if last == parent.LastChild() {
  842. // Opened paragraph may be transformed by ParagraphTransformers in
  843. // closeBlocks().
  844. lastBlock.Parser.Close(last, reader, pc)
  845. blocks := pc.OpenedBlocks()
  846. pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
  847. if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
  848. // Paragraph has been transformed.
  849. // So this parser is considered as failing.
  850. continuable = false
  851. goto retry
  852. }
  853. }
  854. }
  855. node.SetBlankPreviousLines(blankLine)
  856. if last != nil && last.Parent() == nil {
  857. lastPos := len(pc.OpenedBlocks()) - 1
  858. p.closeBlocks(lastPos, lastPos, reader, pc)
  859. }
  860. parent.AppendChild(parent, node)
  861. result = newBlocksOpened
  862. be := Block{node, bp}
  863. pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
  864. if state&HasChildren != 0 {
  865. parent = node
  866. goto retry // try child block
  867. }
  868. break // no children, can not open more blocks on this line
  869. }
  870. }
  871. continuable:
  872. if result == noBlocksOpened && continuable {
  873. state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
  874. if state&Continue != 0 {
  875. result = paragraphContinuation
  876. }
  877. }
  878. return result
  879. }
  880. type lineStat struct {
  881. lineNum int
  882. level int
  883. isBlank bool
  884. }
  885. func isBlankLine(lineNum, level int, stats []lineStat) bool {
  886. ret := true
  887. for i := len(stats) - 1 - level; i >= 0; i-- {
  888. ret = false
  889. s := stats[i]
  890. if s.lineNum == lineNum {
  891. if s.level < level && s.isBlank {
  892. return true
  893. } else if s.level == level {
  894. return s.isBlank
  895. }
  896. }
  897. if s.lineNum < lineNum {
  898. return ret
  899. }
  900. }
  901. return ret
  902. }
  903. func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
  904. pc.SetOpenedBlocks([]Block{})
  905. blankLines := make([]lineStat, 0, 128)
  906. var isBlank bool
  907. for { // process blocks separated by blank lines
  908. _, lines, ok := reader.SkipBlankLines()
  909. if !ok {
  910. return
  911. }
  912. lineNum, _ := reader.Position()
  913. if lines != 0 {
  914. blankLines = blankLines[0:0]
  915. l := len(pc.OpenedBlocks())
  916. for i := 0; i < l; i++ {
  917. blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
  918. }
  919. }
  920. isBlank = isBlankLine(lineNum-1, 0, blankLines)
  921. // first, we try to open blocks
  922. if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
  923. return
  924. }
  925. reader.AdvanceLine()
  926. for { // process opened blocks line by line
  927. openedBlocks := pc.OpenedBlocks()
  928. l := len(openedBlocks)
  929. if l == 0 {
  930. break
  931. }
  932. lastIndex := l - 1
  933. for i := 0; i < l; i++ {
  934. be := openedBlocks[i]
  935. line, _ := reader.PeekLine()
  936. if line == nil {
  937. p.closeBlocks(lastIndex, 0, reader, pc)
  938. reader.AdvanceLine()
  939. return
  940. }
  941. lineNum, _ := reader.Position()
  942. blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
  943. // If node is a paragraph, p.openBlocks determines whether it is continuable.
  944. // So we do not process paragraphs here.
  945. if !ast.IsParagraph(be.Node) {
  946. state := be.Parser.Continue(be.Node, reader, pc)
  947. if state&Continue != 0 {
  948. // When current node is a container block and has no children,
  949. // we try to open new child nodes
  950. if state&HasChildren != 0 && i == lastIndex {
  951. isBlank = isBlankLine(lineNum-1, i, blankLines)
  952. p.openBlocks(be.Node, isBlank, reader, pc)
  953. break
  954. }
  955. continue
  956. }
  957. }
  958. // current node may be closed or lazy continuation
  959. isBlank = isBlankLine(lineNum-1, i, blankLines)
  960. thisParent := parent
  961. if i != 0 {
  962. thisParent = openedBlocks[i-1].Node
  963. }
  964. lastNode := openedBlocks[lastIndex].Node
  965. result := p.openBlocks(thisParent, isBlank, reader, pc)
  966. if result != paragraphContinuation {
  967. // lastNode is a paragraph and was transformed by the paragraph
  968. // transformers.
  969. if openedBlocks[lastIndex].Node != lastNode {
  970. lastIndex--
  971. }
  972. p.closeBlocks(lastIndex, i, reader, pc)
  973. }
  974. break
  975. }
  976. reader.AdvanceLine()
  977. }
  978. }
  979. }
  980. func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
  981. for c := block.FirstChild(); c != nil; c = c.NextSibling() {
  982. p.walkBlock(c, cb)
  983. }
  984. cb(block)
  985. }
  986. const (
  987. lineBreakHard uint8 = 1 << iota
  988. lineBreakSoft
  989. lineBreakVisible
  990. )
  991. func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
  992. if parent.IsRaw() {
  993. return
  994. }
  995. escaped := false
  996. source := block.Source()
  997. block.Reset(parent.Lines())
  998. for {
  999. retry:
  1000. line, _ := block.PeekLine()
  1001. if line == nil {
  1002. break
  1003. }
  1004. lineLength := len(line)
  1005. var lineBreakFlags uint8
  1006. hasNewLine := line[lineLength-1] == '\n'
  1007. if ((lineLength >= 3 && line[lineLength-2] == '\\' &&
  1008. line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
  1009. lineLength -= 2
  1010. lineBreakFlags |= lineBreakHard | lineBreakVisible
  1011. } else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' &&
  1012. line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) &&
  1013. hasNewLine { // ends with \\r\n
  1014. lineLength -= 3
  1015. lineBreakFlags |= lineBreakHard | lineBreakVisible
  1016. } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' &&
  1017. hasNewLine { // ends with [space][space]\n
  1018. lineLength -= 3
  1019. lineBreakFlags |= lineBreakHard
  1020. } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' &&
  1021. line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
  1022. lineLength -= 4
  1023. lineBreakFlags |= lineBreakHard
  1024. } else if hasNewLine {
  1025. // If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
  1026. // If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
  1027. // See https://spec.commonmark.org/0.30/#soft-line-breaks
  1028. lineBreakFlags |= lineBreakSoft
  1029. }
  1030. l, startPosition := block.Position()
  1031. n := 0
  1032. for i := 0; i < lineLength; i++ {
  1033. c := line[i]
  1034. if c == '\n' {
  1035. break
  1036. }
  1037. isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
  1038. isPunct := util.IsPunct(c)
  1039. if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
  1040. parserChar := c
  1041. if isSpace || (i == 0 && !isPunct) {
  1042. parserChar = ' '
  1043. }
  1044. ips := p.inlineParsers[parserChar]
  1045. if ips != nil {
  1046. block.Advance(n)
  1047. n = 0
  1048. savedLine, savedPosition := block.Position()
  1049. if i != 0 {
  1050. _, currentPosition := block.Position()
  1051. ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
  1052. _, startPosition = block.Position()
  1053. }
  1054. var inlineNode ast.Node
  1055. for _, ip := range ips {
  1056. inlineNode = ip.Parse(parent, block, pc)
  1057. if inlineNode != nil {
  1058. break
  1059. }
  1060. block.SetPosition(savedLine, savedPosition)
  1061. }
  1062. if inlineNode != nil {
  1063. parent.AppendChild(parent, inlineNode)
  1064. goto retry
  1065. }
  1066. }
  1067. }
  1068. if escaped {
  1069. escaped = false
  1070. n++
  1071. continue
  1072. }
  1073. if c == '\\' {
  1074. escaped = true
  1075. n++
  1076. continue
  1077. }
  1078. escaped = false
  1079. n++
  1080. }
  1081. if n != 0 {
  1082. block.Advance(n)
  1083. }
  1084. currentL, currentPosition := block.Position()
  1085. if l != currentL {
  1086. continue
  1087. }
  1088. diff := startPosition.Between(currentPosition)
  1089. var text *ast.Text
  1090. if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
  1091. text = ast.NewTextSegment(diff)
  1092. } else {
  1093. text = ast.NewTextSegment(diff.TrimRightSpace(source))
  1094. }
  1095. text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
  1096. text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
  1097. parent.AppendChild(parent, text)
  1098. block.AdvanceLine()
  1099. }
  1100. ProcessDelimiters(nil, pc)
  1101. for _, ip := range p.closeBlockers {
  1102. ip.CloseBlock(parent, block, pc)
  1103. }
  1104. }