parser.go 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253
  1. // Package parser contains stuff that are related to parsing a Markdown text.
  2. package parser
  3. import (
  4. "fmt"
  5. "strings"
  6. "sync"
  7. "github.com/yuin/goldmark/ast"
  8. "github.com/yuin/goldmark/text"
  9. "github.com/yuin/goldmark/util"
  10. )
  11. // A Reference interface represents a link reference in Markdown text.
  12. type Reference interface {
  13. // String implements Stringer.
  14. String() string
  15. // Label returns a label of the reference.
  16. Label() []byte
  17. // Destination returns a destination(URL) of the reference.
  18. Destination() []byte
  19. // Title returns a title of the reference.
  20. Title() []byte
  21. }
  22. type reference struct {
  23. label []byte
  24. destination []byte
  25. title []byte
  26. }
  27. // NewReference returns a new Reference.
  28. func NewReference(label, destination, title []byte) Reference {
  29. return &reference{label, destination, title}
  30. }
  31. func (r *reference) Label() []byte {
  32. return r.label
  33. }
  34. func (r *reference) Destination() []byte {
  35. return r.destination
  36. }
  37. func (r *reference) Title() []byte {
  38. return r.title
  39. }
  40. func (r *reference) String() string {
  41. return fmt.Sprintf("Reference{Label:%s, Destination:%s, Title:%s}", r.label, r.destination, r.title)
  42. }
  43. // An IDs interface is a collection of the element ids.
  44. type IDs interface {
  45. // Generate generates a new element id.
  46. Generate(value []byte, kind ast.NodeKind) []byte
  47. // Put puts a given element id to the used ids table.
  48. Put(value []byte)
  49. }
  50. type ids struct {
  51. values map[string]bool
  52. }
  53. func newIDs() IDs {
  54. return &ids{
  55. values: map[string]bool{},
  56. }
  57. }
  58. func (s *ids) Generate(value []byte, kind ast.NodeKind) []byte {
  59. value = util.TrimLeftSpace(value)
  60. value = util.TrimRightSpace(value)
  61. result := []byte{}
  62. for i := 0; i < len(value); {
  63. v := value[i]
  64. l := util.UTF8Len(v)
  65. i += int(l)
  66. if l != 1 {
  67. continue
  68. }
  69. if util.IsAlphaNumeric(v) {
  70. if 'A' <= v && v <= 'Z' {
  71. v += 'a' - 'A'
  72. }
  73. result = append(result, v)
  74. } else if util.IsSpace(v) || v == '-' || v == '_' {
  75. result = append(result, '-')
  76. }
  77. }
  78. if len(result) == 0 {
  79. if kind == ast.KindHeading {
  80. result = []byte("heading")
  81. } else {
  82. result = []byte("id")
  83. }
  84. }
  85. if _, ok := s.values[util.BytesToReadOnlyString(result)]; !ok {
  86. s.values[util.BytesToReadOnlyString(result)] = true
  87. return result
  88. }
  89. for i := 1; ; i++ {
  90. newResult := fmt.Sprintf("%s-%d", result, i)
  91. if _, ok := s.values[newResult]; !ok {
  92. s.values[newResult] = true
  93. return []byte(newResult)
  94. }
  95. }
  96. }
  97. func (s *ids) Put(value []byte) {
  98. s.values[util.BytesToReadOnlyString(value)] = true
  99. }
  100. // ContextKey is a key that is used to set arbitrary values to the context.
  101. type ContextKey int
  102. // ContextKeyMax is a maximum value of the ContextKey.
  103. var ContextKeyMax ContextKey
  104. // NewContextKey return a new ContextKey value.
  105. func NewContextKey() ContextKey {
  106. ContextKeyMax++
  107. return ContextKeyMax
  108. }
  109. // A Context interface holds a information that are necessary to parse
  110. // Markdown text.
  111. type Context interface {
  112. // String implements Stringer.
  113. String() string
  114. // Get returns a value associated with the given key.
  115. Get(ContextKey) interface{}
  116. // ComputeIfAbsent computes a value if a value associated with the given key is absent and returns the value.
  117. ComputeIfAbsent(ContextKey, func() interface{}) interface{}
  118. // Set sets the given value to the context.
  119. Set(ContextKey, interface{})
  120. // AddReference adds the given reference to this context.
  121. AddReference(Reference)
  122. // Reference returns (a reference, true) if a reference associated with
  123. // the given label exists, otherwise (nil, false).
  124. Reference(label string) (Reference, bool)
  125. // References returns a list of references.
  126. References() []Reference
  127. // IDs returns a collection of the element ids.
  128. IDs() IDs
  129. // BlockOffset returns a first non-space character position on current line.
  130. // This value is valid only for BlockParser.Open.
  131. // BlockOffset returns -1 if current line is blank.
  132. BlockOffset() int
  133. // BlockOffset sets a first non-space character position on current line.
  134. // This value is valid only for BlockParser.Open.
  135. SetBlockOffset(int)
  136. // BlockIndent returns an indent width on current line.
  137. // This value is valid only for BlockParser.Open.
  138. // BlockIndent returns -1 if current line is blank.
  139. BlockIndent() int
  140. // BlockIndent sets an indent width on current line.
  141. // This value is valid only for BlockParser.Open.
  142. SetBlockIndent(int)
  143. // FirstDelimiter returns a first delimiter of the current delimiter list.
  144. FirstDelimiter() *Delimiter
  145. // LastDelimiter returns a last delimiter of the current delimiter list.
  146. LastDelimiter() *Delimiter
  147. // PushDelimiter appends the given delimiter to the tail of the current
  148. // delimiter list.
  149. PushDelimiter(delimiter *Delimiter)
  150. // RemoveDelimiter removes the given delimiter from the current delimiter list.
  151. RemoveDelimiter(d *Delimiter)
  152. // ClearDelimiters clears the current delimiter list.
  153. ClearDelimiters(bottom ast.Node)
  154. // OpenedBlocks returns a list of nodes that are currently in parsing.
  155. OpenedBlocks() []Block
  156. // SetOpenedBlocks sets a list of nodes that are currently in parsing.
  157. SetOpenedBlocks([]Block)
  158. // LastOpenedBlock returns a last node that is currently in parsing.
  159. LastOpenedBlock() Block
  160. // IsInLinkLabel returns true if current position seems to be in link label.
  161. IsInLinkLabel() bool
  162. }
  163. // A ContextConfig struct is a data structure that holds configuration of the Context.
  164. type ContextConfig struct {
  165. IDs IDs
  166. }
  167. // An ContextOption is a functional option type for the Context.
  168. type ContextOption func(*ContextConfig)
  169. // WithIDs is a functional option for the Context.
  170. func WithIDs(ids IDs) ContextOption {
  171. return func(c *ContextConfig) {
  172. c.IDs = ids
  173. }
  174. }
  175. type parseContext struct {
  176. store []interface{}
  177. ids IDs
  178. refs map[string]Reference
  179. blockOffset int
  180. blockIndent int
  181. delimiters *Delimiter
  182. lastDelimiter *Delimiter
  183. openedBlocks []Block
  184. }
  185. // NewContext returns a new Context.
  186. func NewContext(options ...ContextOption) Context {
  187. cfg := &ContextConfig{
  188. IDs: newIDs(),
  189. }
  190. for _, option := range options {
  191. option(cfg)
  192. }
  193. return &parseContext{
  194. store: make([]interface{}, ContextKeyMax+1),
  195. refs: map[string]Reference{},
  196. ids: cfg.IDs,
  197. blockOffset: -1,
  198. blockIndent: -1,
  199. delimiters: nil,
  200. lastDelimiter: nil,
  201. openedBlocks: []Block{},
  202. }
  203. }
  204. func (p *parseContext) Get(key ContextKey) interface{} {
  205. return p.store[key]
  206. }
  207. func (p *parseContext) ComputeIfAbsent(key ContextKey, f func() interface{}) interface{} {
  208. v := p.store[key]
  209. if v == nil {
  210. v = f()
  211. p.store[key] = v
  212. }
  213. return v
  214. }
  215. func (p *parseContext) Set(key ContextKey, value interface{}) {
  216. p.store[key] = value
  217. }
  218. func (p *parseContext) IDs() IDs {
  219. return p.ids
  220. }
  221. func (p *parseContext) BlockOffset() int {
  222. return p.blockOffset
  223. }
  224. func (p *parseContext) SetBlockOffset(v int) {
  225. p.blockOffset = v
  226. }
  227. func (p *parseContext) BlockIndent() int {
  228. return p.blockIndent
  229. }
  230. func (p *parseContext) SetBlockIndent(v int) {
  231. p.blockIndent = v
  232. }
  233. func (p *parseContext) LastDelimiter() *Delimiter {
  234. return p.lastDelimiter
  235. }
  236. func (p *parseContext) FirstDelimiter() *Delimiter {
  237. return p.delimiters
  238. }
  239. func (p *parseContext) PushDelimiter(d *Delimiter) {
  240. if p.delimiters == nil {
  241. p.delimiters = d
  242. p.lastDelimiter = d
  243. } else {
  244. l := p.lastDelimiter
  245. p.lastDelimiter = d
  246. l.NextDelimiter = d
  247. d.PreviousDelimiter = l
  248. }
  249. }
  250. func (p *parseContext) RemoveDelimiter(d *Delimiter) {
  251. if d.PreviousDelimiter == nil {
  252. p.delimiters = d.NextDelimiter
  253. } else {
  254. d.PreviousDelimiter.NextDelimiter = d.NextDelimiter
  255. if d.NextDelimiter != nil {
  256. d.NextDelimiter.PreviousDelimiter = d.PreviousDelimiter
  257. }
  258. }
  259. if d.NextDelimiter == nil {
  260. p.lastDelimiter = d.PreviousDelimiter
  261. }
  262. if p.delimiters != nil {
  263. p.delimiters.PreviousDelimiter = nil
  264. }
  265. if p.lastDelimiter != nil {
  266. p.lastDelimiter.NextDelimiter = nil
  267. }
  268. d.NextDelimiter = nil
  269. d.PreviousDelimiter = nil
  270. if d.Length != 0 {
  271. ast.MergeOrReplaceTextSegment(d.Parent(), d, d.Segment)
  272. } else {
  273. d.Parent().RemoveChild(d.Parent(), d)
  274. }
  275. }
  276. func (p *parseContext) ClearDelimiters(bottom ast.Node) {
  277. if p.lastDelimiter == nil {
  278. return
  279. }
  280. var c ast.Node
  281. for c = p.lastDelimiter; c != nil && c != bottom; {
  282. prev := c.PreviousSibling()
  283. if d, ok := c.(*Delimiter); ok {
  284. p.RemoveDelimiter(d)
  285. }
  286. c = prev
  287. }
  288. }
  289. func (p *parseContext) AddReference(ref Reference) {
  290. key := util.ToLinkReference(ref.Label())
  291. if _, ok := p.refs[key]; !ok {
  292. p.refs[key] = ref
  293. }
  294. }
  295. func (p *parseContext) Reference(label string) (Reference, bool) {
  296. v, ok := p.refs[label]
  297. return v, ok
  298. }
  299. func (p *parseContext) References() []Reference {
  300. ret := make([]Reference, 0, len(p.refs))
  301. for _, v := range p.refs {
  302. ret = append(ret, v)
  303. }
  304. return ret
  305. }
  306. func (p *parseContext) String() string {
  307. refs := []string{}
  308. for _, r := range p.refs {
  309. refs = append(refs, r.String())
  310. }
  311. return fmt.Sprintf("Context{Store:%#v, Refs:%s}", p.store, strings.Join(refs, ","))
  312. }
  313. func (p *parseContext) OpenedBlocks() []Block {
  314. return p.openedBlocks
  315. }
  316. func (p *parseContext) SetOpenedBlocks(v []Block) {
  317. p.openedBlocks = v
  318. }
  319. func (p *parseContext) LastOpenedBlock() Block {
  320. if l := len(p.openedBlocks); l != 0 {
  321. return p.openedBlocks[l-1]
  322. }
  323. return Block{}
  324. }
  325. func (p *parseContext) IsInLinkLabel() bool {
  326. tlist := p.Get(linkLabelStateKey)
  327. return tlist != nil
  328. }
  329. // State represents parser's state.
  330. // State is designed to use as a bit flag.
  331. type State int
  332. const (
  333. none State = 1 << iota
  334. // Continue indicates parser can continue parsing.
  335. Continue
  336. // Close indicates parser cannot parse anymore.
  337. Close
  338. // HasChildren indicates parser may have child blocks.
  339. HasChildren
  340. // NoChildren indicates parser does not have child blocks.
  341. NoChildren
  342. // RequireParagraph indicates parser requires that the last node
  343. // must be a paragraph and is not converted to other nodes by
  344. // ParagraphTransformers.
  345. RequireParagraph
  346. )
  347. // A Config struct is a data structure that holds configuration of the Parser.
  348. type Config struct {
  349. Options map[OptionName]interface{}
  350. BlockParsers util.PrioritizedSlice /*<BlockParser>*/
  351. InlineParsers util.PrioritizedSlice /*<InlineParser>*/
  352. ParagraphTransformers util.PrioritizedSlice /*<ParagraphTransformer>*/
  353. ASTTransformers util.PrioritizedSlice /*<ASTTransformer>*/
  354. EscapedSpace bool
  355. }
  356. // NewConfig returns a new Config.
  357. func NewConfig() *Config {
  358. return &Config{
  359. Options: map[OptionName]interface{}{},
  360. BlockParsers: util.PrioritizedSlice{},
  361. InlineParsers: util.PrioritizedSlice{},
  362. ParagraphTransformers: util.PrioritizedSlice{},
  363. ASTTransformers: util.PrioritizedSlice{},
  364. }
  365. }
  366. // An Option interface is a functional option type for the Parser.
  367. type Option interface {
  368. SetParserOption(*Config)
  369. }
  370. // OptionName is a name of parser options.
  371. type OptionName string
  372. // Attribute is an option name that spacify attributes of elements.
  373. const optAttribute OptionName = "Attribute"
  374. type withAttribute struct {
  375. }
  376. func (o *withAttribute) SetParserOption(c *Config) {
  377. c.Options[optAttribute] = true
  378. }
  379. // WithAttribute is a functional option that enables custom attributes.
  380. func WithAttribute() Option {
  381. return &withAttribute{}
  382. }
  383. // A Parser interface parses Markdown text into AST nodes.
  384. type Parser interface {
  385. // Parse parses the given Markdown text into AST nodes.
  386. Parse(reader text.Reader, opts ...ParseOption) ast.Node
  387. // AddOption adds the given option to this parser.
  388. AddOptions(...Option)
  389. }
  390. // A SetOptioner interface sets the given option to the object.
  391. type SetOptioner interface {
  392. // SetOption sets the given option to the object.
  393. // Unacceptable options may be passed.
  394. // Thus implementations must ignore unacceptable options.
  395. SetOption(name OptionName, value interface{})
  396. }
  397. // A BlockParser interface parses a block level element like Paragraph, List,
  398. // Blockquote etc.
  399. type BlockParser interface {
  400. // Trigger returns a list of characters that triggers Parse method of
  401. // this parser.
  402. // If Trigger returns a nil, Open will be called with any lines.
  403. Trigger() []byte
  404. // Open parses the current line and returns a result of parsing.
  405. //
  406. // Open must not parse beyond the current line.
  407. // If Open has been able to parse the current line, Open must advance a reader
  408. // position by consumed byte length.
  409. //
  410. // If Open has not been able to parse the current line, Open should returns
  411. // (nil, NoChildren). If Open has been able to parse the current line, Open
  412. // should returns a new Block node and returns HasChildren or NoChildren.
  413. Open(parent ast.Node, reader text.Reader, pc Context) (ast.Node, State)
  414. // Continue parses the current line and returns a result of parsing.
  415. //
  416. // Continue must not parse beyond the current line.
  417. // If Continue has been able to parse the current line, Continue must advance
  418. // a reader position by consumed byte length.
  419. //
  420. // If Continue has not been able to parse the current line, Continue should
  421. // returns Close. If Continue has been able to parse the current line,
  422. // Continue should returns (Continue | NoChildren) or
  423. // (Continue | HasChildren)
  424. Continue(node ast.Node, reader text.Reader, pc Context) State
  425. // Close will be called when the parser returns Close.
  426. Close(node ast.Node, reader text.Reader, pc Context)
  427. // CanInterruptParagraph returns true if the parser can interrupt paragraphs,
  428. // otherwise false.
  429. CanInterruptParagraph() bool
  430. // CanAcceptIndentedLine returns true if the parser can open new node when
  431. // the given line is being indented more than 3 spaces.
  432. CanAcceptIndentedLine() bool
  433. }
  434. // An InlineParser interface parses an inline level element like CodeSpan, Link etc.
  435. type InlineParser interface {
  436. // Trigger returns a list of characters that triggers Parse method of
  437. // this parser.
  438. // Trigger characters must be a punctuation or a halfspace.
  439. // Halfspaces triggers this parser when character is any spaces characters or
  440. // a head of line
  441. Trigger() []byte
  442. // Parse parse the given block into an inline node.
  443. //
  444. // Parse can parse beyond the current line.
  445. // If Parse has been able to parse the current line, it must advance a reader
  446. // position by consumed byte length.
  447. Parse(parent ast.Node, block text.Reader, pc Context) ast.Node
  448. }
  449. // A CloseBlocker interface is a callback function that will be
  450. // called when block is closed in the inline parsing.
  451. type CloseBlocker interface {
  452. // CloseBlock will be called when a block is closed.
  453. CloseBlock(parent ast.Node, block text.Reader, pc Context)
  454. }
  455. // A ParagraphTransformer transforms parsed Paragraph nodes.
  456. // For example, link references are searched in parsed Paragraphs.
  457. type ParagraphTransformer interface {
  458. // Transform transforms the given paragraph.
  459. Transform(node *ast.Paragraph, reader text.Reader, pc Context)
  460. }
  461. // ASTTransformer transforms entire Markdown document AST tree.
  462. type ASTTransformer interface {
  463. // Transform transforms the given AST tree.
  464. Transform(node *ast.Document, reader text.Reader, pc Context)
  465. }
  466. // DefaultBlockParsers returns a new list of default BlockParsers.
  467. // Priorities of default BlockParsers are:
  468. //
  469. // SetextHeadingParser, 100
  470. // ThematicBreakParser, 200
  471. // ListParser, 300
  472. // ListItemParser, 400
  473. // CodeBlockParser, 500
  474. // ATXHeadingParser, 600
  475. // FencedCodeBlockParser, 700
  476. // BlockquoteParser, 800
  477. // HTMLBlockParser, 900
  478. // ParagraphParser, 1000
  479. func DefaultBlockParsers() []util.PrioritizedValue {
  480. return []util.PrioritizedValue{
  481. util.Prioritized(NewSetextHeadingParser(), 100),
  482. util.Prioritized(NewThematicBreakParser(), 200),
  483. util.Prioritized(NewListParser(), 300),
  484. util.Prioritized(NewListItemParser(), 400),
  485. util.Prioritized(NewCodeBlockParser(), 500),
  486. util.Prioritized(NewATXHeadingParser(), 600),
  487. util.Prioritized(NewFencedCodeBlockParser(), 700),
  488. util.Prioritized(NewBlockquoteParser(), 800),
  489. util.Prioritized(NewHTMLBlockParser(), 900),
  490. util.Prioritized(NewParagraphParser(), 1000),
  491. }
  492. }
  493. // DefaultInlineParsers returns a new list of default InlineParsers.
  494. // Priorities of default InlineParsers are:
  495. //
  496. // CodeSpanParser, 100
  497. // LinkParser, 200
  498. // AutoLinkParser, 300
  499. // RawHTMLParser, 400
  500. // EmphasisParser, 500
  501. func DefaultInlineParsers() []util.PrioritizedValue {
  502. return []util.PrioritizedValue{
  503. util.Prioritized(NewCodeSpanParser(), 100),
  504. util.Prioritized(NewLinkParser(), 200),
  505. util.Prioritized(NewAutoLinkParser(), 300),
  506. util.Prioritized(NewRawHTMLParser(), 400),
  507. util.Prioritized(NewEmphasisParser(), 500),
  508. }
  509. }
  510. // DefaultParagraphTransformers returns a new list of default ParagraphTransformers.
  511. // Priorities of default ParagraphTransformers are:
  512. //
  513. // LinkReferenceParagraphTransformer, 100
  514. func DefaultParagraphTransformers() []util.PrioritizedValue {
  515. return []util.PrioritizedValue{
  516. util.Prioritized(LinkReferenceParagraphTransformer, 100),
  517. }
  518. }
  519. // A Block struct holds a node and correspond parser pair.
  520. type Block struct {
  521. // Node is a BlockNode.
  522. Node ast.Node
  523. // Parser is a BlockParser.
  524. Parser BlockParser
  525. }
  526. type parser struct {
  527. options map[OptionName]interface{}
  528. blockParsers [256][]BlockParser
  529. freeBlockParsers []BlockParser
  530. inlineParsers [256][]InlineParser
  531. closeBlockers []CloseBlocker
  532. paragraphTransformers []ParagraphTransformer
  533. astTransformers []ASTTransformer
  534. escapedSpace bool
  535. config *Config
  536. initSync sync.Once
  537. }
  538. type withBlockParsers struct {
  539. value []util.PrioritizedValue
  540. }
  541. func (o *withBlockParsers) SetParserOption(c *Config) {
  542. c.BlockParsers = append(c.BlockParsers, o.value...)
  543. }
  544. // WithBlockParsers is a functional option that allow you to add
  545. // BlockParsers to the parser.
  546. func WithBlockParsers(bs ...util.PrioritizedValue) Option {
  547. return &withBlockParsers{bs}
  548. }
  549. type withInlineParsers struct {
  550. value []util.PrioritizedValue
  551. }
  552. func (o *withInlineParsers) SetParserOption(c *Config) {
  553. c.InlineParsers = append(c.InlineParsers, o.value...)
  554. }
  555. // WithInlineParsers is a functional option that allow you to add
  556. // InlineParsers to the parser.
  557. func WithInlineParsers(bs ...util.PrioritizedValue) Option {
  558. return &withInlineParsers{bs}
  559. }
  560. type withParagraphTransformers struct {
  561. value []util.PrioritizedValue
  562. }
  563. func (o *withParagraphTransformers) SetParserOption(c *Config) {
  564. c.ParagraphTransformers = append(c.ParagraphTransformers, o.value...)
  565. }
  566. // WithParagraphTransformers is a functional option that allow you to add
  567. // ParagraphTransformers to the parser.
  568. func WithParagraphTransformers(ps ...util.PrioritizedValue) Option {
  569. return &withParagraphTransformers{ps}
  570. }
  571. type withASTTransformers struct {
  572. value []util.PrioritizedValue
  573. }
  574. func (o *withASTTransformers) SetParserOption(c *Config) {
  575. c.ASTTransformers = append(c.ASTTransformers, o.value...)
  576. }
  577. // WithASTTransformers is a functional option that allow you to add
  578. // ASTTransformers to the parser.
  579. func WithASTTransformers(ps ...util.PrioritizedValue) Option {
  580. return &withASTTransformers{ps}
  581. }
  582. type withEscapedSpace struct {
  583. }
  584. func (o *withEscapedSpace) SetParserOption(c *Config) {
  585. c.EscapedSpace = true
  586. }
  587. // WithEscapedSpace is a functional option indicates that a '\' escaped half-space(0x20) should not trigger parsers.
  588. func WithEscapedSpace() Option {
  589. return &withEscapedSpace{}
  590. }
  591. type withOption struct {
  592. name OptionName
  593. value interface{}
  594. }
  595. func (o *withOption) SetParserOption(c *Config) {
  596. c.Options[o.name] = o.value
  597. }
  598. // WithOption is a functional option that allow you to set
  599. // an arbitrary option to the parser.
  600. func WithOption(name OptionName, value interface{}) Option {
  601. return &withOption{name, value}
  602. }
  603. // NewParser returns a new Parser with given options.
  604. func NewParser(options ...Option) Parser {
  605. config := NewConfig()
  606. for _, opt := range options {
  607. opt.SetParserOption(config)
  608. }
  609. p := &parser{
  610. options: map[OptionName]interface{}{},
  611. config: config,
  612. }
  613. return p
  614. }
  615. func (p *parser) AddOptions(opts ...Option) {
  616. for _, opt := range opts {
  617. opt.SetParserOption(p.config)
  618. }
  619. }
  620. func (p *parser) addBlockParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
  621. bp, ok := v.Value.(BlockParser)
  622. if !ok {
  623. panic(fmt.Sprintf("%v is not a BlockParser", v.Value))
  624. }
  625. tcs := bp.Trigger()
  626. so, ok := v.Value.(SetOptioner)
  627. if ok {
  628. for oname, ovalue := range options {
  629. so.SetOption(oname, ovalue)
  630. }
  631. }
  632. if tcs == nil {
  633. p.freeBlockParsers = append(p.freeBlockParsers, bp)
  634. } else {
  635. for _, tc := range tcs {
  636. if p.blockParsers[tc] == nil {
  637. p.blockParsers[tc] = []BlockParser{}
  638. }
  639. p.blockParsers[tc] = append(p.blockParsers[tc], bp)
  640. }
  641. }
  642. }
  643. func (p *parser) addInlineParser(v util.PrioritizedValue, options map[OptionName]interface{}) {
  644. ip, ok := v.Value.(InlineParser)
  645. if !ok {
  646. panic(fmt.Sprintf("%v is not a InlineParser", v.Value))
  647. }
  648. tcs := ip.Trigger()
  649. so, ok := v.Value.(SetOptioner)
  650. if ok {
  651. for oname, ovalue := range options {
  652. so.SetOption(oname, ovalue)
  653. }
  654. }
  655. if cb, ok := ip.(CloseBlocker); ok {
  656. p.closeBlockers = append(p.closeBlockers, cb)
  657. }
  658. for _, tc := range tcs {
  659. if p.inlineParsers[tc] == nil {
  660. p.inlineParsers[tc] = []InlineParser{}
  661. }
  662. p.inlineParsers[tc] = append(p.inlineParsers[tc], ip)
  663. }
  664. }
  665. func (p *parser) addParagraphTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
  666. pt, ok := v.Value.(ParagraphTransformer)
  667. if !ok {
  668. panic(fmt.Sprintf("%v is not a ParagraphTransformer", v.Value))
  669. }
  670. so, ok := v.Value.(SetOptioner)
  671. if ok {
  672. for oname, ovalue := range options {
  673. so.SetOption(oname, ovalue)
  674. }
  675. }
  676. p.paragraphTransformers = append(p.paragraphTransformers, pt)
  677. }
  678. func (p *parser) addASTTransformer(v util.PrioritizedValue, options map[OptionName]interface{}) {
  679. at, ok := v.Value.(ASTTransformer)
  680. if !ok {
  681. panic(fmt.Sprintf("%v is not a ASTTransformer", v.Value))
  682. }
  683. so, ok := v.Value.(SetOptioner)
  684. if ok {
  685. for oname, ovalue := range options {
  686. so.SetOption(oname, ovalue)
  687. }
  688. }
  689. p.astTransformers = append(p.astTransformers, at)
  690. }
  691. // A ParseConfig struct is a data structure that holds configuration of the Parser.Parse.
  692. type ParseConfig struct {
  693. Context Context
  694. }
  695. // A ParseOption is a functional option type for the Parser.Parse.
  696. type ParseOption func(c *ParseConfig)
  697. // WithContext is a functional option that allow you to override
  698. // a default context.
  699. func WithContext(context Context) ParseOption {
  700. return func(c *ParseConfig) {
  701. c.Context = context
  702. }
  703. }
  704. func (p *parser) Parse(reader text.Reader, opts ...ParseOption) ast.Node {
  705. p.initSync.Do(func() {
  706. p.config.BlockParsers.Sort()
  707. for _, v := range p.config.BlockParsers {
  708. p.addBlockParser(v, p.config.Options)
  709. }
  710. for i := range p.blockParsers {
  711. if p.blockParsers[i] != nil {
  712. p.blockParsers[i] = append(p.blockParsers[i], p.freeBlockParsers...)
  713. }
  714. }
  715. p.config.InlineParsers.Sort()
  716. for _, v := range p.config.InlineParsers {
  717. p.addInlineParser(v, p.config.Options)
  718. }
  719. p.config.ParagraphTransformers.Sort()
  720. for _, v := range p.config.ParagraphTransformers {
  721. p.addParagraphTransformer(v, p.config.Options)
  722. }
  723. p.config.ASTTransformers.Sort()
  724. for _, v := range p.config.ASTTransformers {
  725. p.addASTTransformer(v, p.config.Options)
  726. }
  727. p.escapedSpace = p.config.EscapedSpace
  728. p.config = nil
  729. })
  730. c := &ParseConfig{}
  731. for _, opt := range opts {
  732. opt(c)
  733. }
  734. if c.Context == nil {
  735. c.Context = NewContext()
  736. }
  737. pc := c.Context
  738. root := ast.NewDocument()
  739. p.parseBlocks(root, reader, pc)
  740. blockReader := text.NewBlockReader(reader.Source(), nil)
  741. p.walkBlock(root, func(node ast.Node) {
  742. p.parseBlock(blockReader, node, pc)
  743. })
  744. for _, at := range p.astTransformers {
  745. at.Transform(root, reader, pc)
  746. }
  747. // root.Dump(reader.Source(), 0)
  748. return root
  749. }
  750. func (p *parser) transformParagraph(node *ast.Paragraph, reader text.Reader, pc Context) bool {
  751. for _, pt := range p.paragraphTransformers {
  752. pt.Transform(node, reader, pc)
  753. if node.Parent() == nil {
  754. return true
  755. }
  756. }
  757. return false
  758. }
  759. func (p *parser) closeBlocks(from, to int, reader text.Reader, pc Context) {
  760. blocks := pc.OpenedBlocks()
  761. for i := from; i >= to; i-- {
  762. node := blocks[i].Node
  763. paragraph, ok := node.(*ast.Paragraph)
  764. if ok && node.Parent() != nil {
  765. p.transformParagraph(paragraph, reader, pc)
  766. }
  767. if node.Parent() != nil { // closes only if node has not been transformed
  768. blocks[i].Parser.Close(blocks[i].Node, reader, pc)
  769. }
  770. }
  771. if from == len(blocks)-1 {
  772. blocks = blocks[0:to]
  773. } else {
  774. blocks = append(blocks[0:to], blocks[from+1:]...)
  775. }
  776. pc.SetOpenedBlocks(blocks)
  777. }
  778. type blockOpenResult int
  779. const (
  780. paragraphContinuation blockOpenResult = iota + 1
  781. newBlocksOpened
  782. noBlocksOpened
  783. )
  784. func (p *parser) openBlocks(parent ast.Node, blankLine bool, reader text.Reader, pc Context) blockOpenResult {
  785. result := blockOpenResult(noBlocksOpened)
  786. continuable := false
  787. lastBlock := pc.LastOpenedBlock()
  788. if lastBlock.Node != nil {
  789. continuable = ast.IsParagraph(lastBlock.Node)
  790. }
  791. retry:
  792. var bps []BlockParser
  793. line, _ := reader.PeekLine()
  794. w, pos := util.IndentWidth(line, reader.LineOffset())
  795. if w >= len(line) {
  796. pc.SetBlockOffset(-1)
  797. pc.SetBlockIndent(-1)
  798. } else {
  799. pc.SetBlockOffset(pos)
  800. pc.SetBlockIndent(w)
  801. }
  802. if line == nil || line[0] == '\n' {
  803. goto continuable
  804. }
  805. bps = p.freeBlockParsers
  806. if pos < len(line) {
  807. bps = p.blockParsers[line[pos]]
  808. if bps == nil {
  809. bps = p.freeBlockParsers
  810. }
  811. }
  812. if bps == nil {
  813. goto continuable
  814. }
  815. for _, bp := range bps {
  816. if continuable && result == noBlocksOpened && !bp.CanInterruptParagraph() {
  817. continue
  818. }
  819. if w > 3 && !bp.CanAcceptIndentedLine() {
  820. continue
  821. }
  822. lastBlock = pc.LastOpenedBlock()
  823. last := lastBlock.Node
  824. node, state := bp.Open(parent, reader, pc)
  825. if node != nil {
  826. // Parser requires last node to be a paragraph.
  827. // With table extension:
  828. //
  829. // 0
  830. // -:
  831. // -
  832. //
  833. // '-' on 3rd line seems a Setext heading because 1st and 2nd lines
  834. // are being paragraph when the Settext heading parser tries to parse the 3rd
  835. // line.
  836. // But 1st line and 2nd line are a table. Thus this paragraph will be transformed
  837. // by a paragraph transformer. So this text should be converted to a table and
  838. // an empty list.
  839. if state&RequireParagraph != 0 {
  840. if last == parent.LastChild() {
  841. // Opened paragraph may be transformed by ParagraphTransformers in
  842. // closeBlocks().
  843. lastBlock.Parser.Close(last, reader, pc)
  844. blocks := pc.OpenedBlocks()
  845. pc.SetOpenedBlocks(blocks[0 : len(blocks)-1])
  846. if p.transformParagraph(last.(*ast.Paragraph), reader, pc) {
  847. // Paragraph has been transformed.
  848. // So this parser is considered as failing.
  849. continuable = false
  850. goto retry
  851. }
  852. }
  853. }
  854. node.SetBlankPreviousLines(blankLine)
  855. if last != nil && last.Parent() == nil {
  856. lastPos := len(pc.OpenedBlocks()) - 1
  857. p.closeBlocks(lastPos, lastPos, reader, pc)
  858. }
  859. parent.AppendChild(parent, node)
  860. result = newBlocksOpened
  861. be := Block{node, bp}
  862. pc.SetOpenedBlocks(append(pc.OpenedBlocks(), be))
  863. if state&HasChildren != 0 {
  864. parent = node
  865. goto retry // try child block
  866. }
  867. break // no children, can not open more blocks on this line
  868. }
  869. }
  870. continuable:
  871. if result == noBlocksOpened && continuable {
  872. state := lastBlock.Parser.Continue(lastBlock.Node, reader, pc)
  873. if state&Continue != 0 {
  874. result = paragraphContinuation
  875. }
  876. }
  877. return result
  878. }
  879. type lineStat struct {
  880. lineNum int
  881. level int
  882. isBlank bool
  883. }
  884. func isBlankLine(lineNum, level int, stats []lineStat) bool {
  885. ret := true
  886. for i := len(stats) - 1 - level; i >= 0; i-- {
  887. ret = false
  888. s := stats[i]
  889. if s.lineNum == lineNum {
  890. if s.level < level && s.isBlank {
  891. return true
  892. } else if s.level == level {
  893. return s.isBlank
  894. }
  895. }
  896. if s.lineNum < lineNum {
  897. return ret
  898. }
  899. }
  900. return ret
  901. }
  902. func (p *parser) parseBlocks(parent ast.Node, reader text.Reader, pc Context) {
  903. pc.SetOpenedBlocks([]Block{})
  904. blankLines := make([]lineStat, 0, 128)
  905. isBlank := false
  906. for { // process blocks separated by blank lines
  907. _, lines, ok := reader.SkipBlankLines()
  908. if !ok {
  909. return
  910. }
  911. lineNum, _ := reader.Position()
  912. if lines != 0 {
  913. blankLines = blankLines[0:0]
  914. l := len(pc.OpenedBlocks())
  915. for i := 0; i < l; i++ {
  916. blankLines = append(blankLines, lineStat{lineNum - 1, i, lines != 0})
  917. }
  918. }
  919. isBlank = isBlankLine(lineNum-1, 0, blankLines)
  920. // first, we try to open blocks
  921. if p.openBlocks(parent, isBlank, reader, pc) != newBlocksOpened {
  922. return
  923. }
  924. reader.AdvanceLine()
  925. for { // process opened blocks line by line
  926. openedBlocks := pc.OpenedBlocks()
  927. l := len(openedBlocks)
  928. if l == 0 {
  929. break
  930. }
  931. lastIndex := l - 1
  932. for i := 0; i < l; i++ {
  933. be := openedBlocks[i]
  934. line, _ := reader.PeekLine()
  935. if line == nil {
  936. p.closeBlocks(lastIndex, 0, reader, pc)
  937. reader.AdvanceLine()
  938. return
  939. }
  940. lineNum, _ := reader.Position()
  941. blankLines = append(blankLines, lineStat{lineNum, i, util.IsBlank(line)})
  942. // If node is a paragraph, p.openBlocks determines whether it is continuable.
  943. // So we do not process paragraphs here.
  944. if !ast.IsParagraph(be.Node) {
  945. state := be.Parser.Continue(be.Node, reader, pc)
  946. if state&Continue != 0 {
  947. // When current node is a container block and has no children,
  948. // we try to open new child nodes
  949. if state&HasChildren != 0 && i == lastIndex {
  950. isBlank = isBlankLine(lineNum-1, i, blankLines)
  951. p.openBlocks(be.Node, isBlank, reader, pc)
  952. break
  953. }
  954. continue
  955. }
  956. }
  957. // current node may be closed or lazy continuation
  958. isBlank = isBlankLine(lineNum-1, i, blankLines)
  959. thisParent := parent
  960. if i != 0 {
  961. thisParent = openedBlocks[i-1].Node
  962. }
  963. lastNode := openedBlocks[lastIndex].Node
  964. result := p.openBlocks(thisParent, isBlank, reader, pc)
  965. if result != paragraphContinuation {
  966. // lastNode is a paragraph and was transformed by the paragraph
  967. // transformers.
  968. if openedBlocks[lastIndex].Node != lastNode {
  969. lastIndex--
  970. }
  971. p.closeBlocks(lastIndex, i, reader, pc)
  972. }
  973. break
  974. }
  975. reader.AdvanceLine()
  976. }
  977. }
  978. }
  979. func (p *parser) walkBlock(block ast.Node, cb func(node ast.Node)) {
  980. for c := block.FirstChild(); c != nil; c = c.NextSibling() {
  981. p.walkBlock(c, cb)
  982. }
  983. cb(block)
  984. }
  985. const (
  986. lineBreakHard uint8 = 1 << iota
  987. lineBreakSoft
  988. lineBreakVisible
  989. )
  990. func (p *parser) parseBlock(block text.BlockReader, parent ast.Node, pc Context) {
  991. if parent.IsRaw() {
  992. return
  993. }
  994. escaped := false
  995. source := block.Source()
  996. block.Reset(parent.Lines())
  997. for {
  998. retry:
  999. line, _ := block.PeekLine()
  1000. if line == nil {
  1001. break
  1002. }
  1003. lineLength := len(line)
  1004. var lineBreakFlags uint8 = 0
  1005. hasNewLine := line[lineLength-1] == '\n'
  1006. if ((lineLength >= 3 && line[lineLength-2] == '\\' && line[lineLength-3] != '\\') || (lineLength == 2 && line[lineLength-2] == '\\')) && hasNewLine { // ends with \\n
  1007. lineLength -= 2
  1008. lineBreakFlags |= lineBreakHard | lineBreakVisible
  1009. } else if ((lineLength >= 4 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r' && line[lineLength-4] != '\\') || (lineLength == 3 && line[lineLength-3] == '\\' && line[lineLength-2] == '\r')) && hasNewLine { // ends with \\r\n
  1010. lineLength -= 3
  1011. lineBreakFlags |= lineBreakHard | lineBreakVisible
  1012. } else if lineLength >= 3 && line[lineLength-3] == ' ' && line[lineLength-2] == ' ' && hasNewLine { // ends with [space][space]\n
  1013. lineLength -= 3
  1014. lineBreakFlags |= lineBreakHard
  1015. } else if lineLength >= 4 && line[lineLength-4] == ' ' && line[lineLength-3] == ' ' && line[lineLength-2] == '\r' && hasNewLine { // ends with [space][space]\r\n
  1016. lineLength -= 4
  1017. lineBreakFlags |= lineBreakHard
  1018. } else if hasNewLine {
  1019. // If the line ends with a newline character, but it is not a hardlineBreak, then it is a softLinebreak
  1020. // If the line ends with a hardlineBreak, then it cannot end with a softLinebreak
  1021. // See https://spec.commonmark.org/0.30/#soft-line-breaks
  1022. lineBreakFlags |= lineBreakSoft
  1023. }
  1024. l, startPosition := block.Position()
  1025. n := 0
  1026. for i := 0; i < lineLength; i++ {
  1027. c := line[i]
  1028. if c == '\n' {
  1029. break
  1030. }
  1031. isSpace := util.IsSpace(c) && c != '\r' && c != '\n'
  1032. isPunct := util.IsPunct(c)
  1033. if (isPunct && !escaped) || isSpace && !(escaped && p.escapedSpace) || i == 0 {
  1034. parserChar := c
  1035. if isSpace || (i == 0 && !isPunct) {
  1036. parserChar = ' '
  1037. }
  1038. ips := p.inlineParsers[parserChar]
  1039. if ips != nil {
  1040. block.Advance(n)
  1041. n = 0
  1042. savedLine, savedPosition := block.Position()
  1043. if i != 0 {
  1044. _, currentPosition := block.Position()
  1045. ast.MergeOrAppendTextSegment(parent, startPosition.Between(currentPosition))
  1046. _, startPosition = block.Position()
  1047. }
  1048. var inlineNode ast.Node
  1049. for _, ip := range ips {
  1050. inlineNode = ip.Parse(parent, block, pc)
  1051. if inlineNode != nil {
  1052. break
  1053. }
  1054. block.SetPosition(savedLine, savedPosition)
  1055. }
  1056. if inlineNode != nil {
  1057. parent.AppendChild(parent, inlineNode)
  1058. goto retry
  1059. }
  1060. }
  1061. }
  1062. if escaped {
  1063. escaped = false
  1064. n++
  1065. continue
  1066. }
  1067. if c == '\\' {
  1068. escaped = true
  1069. n++
  1070. continue
  1071. }
  1072. escaped = false
  1073. n++
  1074. }
  1075. if n != 0 {
  1076. block.Advance(n)
  1077. }
  1078. currentL, currentPosition := block.Position()
  1079. if l != currentL {
  1080. continue
  1081. }
  1082. diff := startPosition.Between(currentPosition)
  1083. var text *ast.Text
  1084. if lineBreakFlags&(lineBreakHard|lineBreakVisible) == lineBreakHard|lineBreakVisible {
  1085. text = ast.NewTextSegment(diff)
  1086. } else {
  1087. text = ast.NewTextSegment(diff.TrimRightSpace(source))
  1088. }
  1089. text.SetSoftLineBreak(lineBreakFlags&lineBreakSoft != 0)
  1090. text.SetHardLineBreak(lineBreakFlags&lineBreakHard != 0)
  1091. parent.AppendChild(parent, text)
  1092. block.AdvanceLine()
  1093. }
  1094. ProcessDelimiters(nil, pc)
  1095. for _, ip := range p.closeBlockers {
  1096. ip.CloseBlock(parent, block, pc)
  1097. }
  1098. }