html.go 27 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019
  1. // Package html implements renderer that outputs HTMLs.
  2. package html
  3. import (
  4. "bytes"
  5. "fmt"
  6. "strconv"
  7. "unicode"
  8. "unicode/utf8"
  9. "github.com/yuin/goldmark/ast"
  10. "github.com/yuin/goldmark/renderer"
  11. "github.com/yuin/goldmark/util"
  12. )
  13. // A Config struct has configurations for the HTML based renderers.
  14. type Config struct {
  15. Writer Writer
  16. HardWraps bool
  17. EastAsianLineBreaks EastAsianLineBreaks
  18. XHTML bool
  19. Unsafe bool
  20. }
  21. // NewConfig returns a new Config with defaults.
  22. func NewConfig() Config {
  23. return Config{
  24. Writer: DefaultWriter,
  25. HardWraps: false,
  26. EastAsianLineBreaks: EastAsianLineBreaksNone,
  27. XHTML: false,
  28. Unsafe: false,
  29. }
  30. }
  31. // SetOption implements renderer.NodeRenderer.SetOption.
  32. func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
  33. switch name {
  34. case optHardWraps:
  35. c.HardWraps = value.(bool)
  36. case optEastAsianLineBreaks:
  37. c.EastAsianLineBreaks = value.(EastAsianLineBreaks)
  38. case optXHTML:
  39. c.XHTML = value.(bool)
  40. case optUnsafe:
  41. c.Unsafe = value.(bool)
  42. case optTextWriter:
  43. c.Writer = value.(Writer)
  44. }
  45. }
  46. // An Option interface sets options for HTML based renderers.
  47. type Option interface {
  48. SetHTMLOption(*Config)
  49. }
  50. // TextWriter is an option name used in WithWriter.
  51. const optTextWriter renderer.OptionName = "Writer"
  52. type withWriter struct {
  53. value Writer
  54. }
  55. func (o *withWriter) SetConfig(c *renderer.Config) {
  56. c.Options[optTextWriter] = o.value
  57. }
  58. func (o *withWriter) SetHTMLOption(c *Config) {
  59. c.Writer = o.value
  60. }
  61. // WithWriter is a functional option that allow you to set the given writer to
  62. // the renderer.
  63. func WithWriter(writer Writer) interface {
  64. renderer.Option
  65. Option
  66. } {
  67. return &withWriter{writer}
  68. }
  69. // HardWraps is an option name used in WithHardWraps.
  70. const optHardWraps renderer.OptionName = "HardWraps"
  71. type withHardWraps struct {
  72. }
  73. func (o *withHardWraps) SetConfig(c *renderer.Config) {
  74. c.Options[optHardWraps] = true
  75. }
  76. func (o *withHardWraps) SetHTMLOption(c *Config) {
  77. c.HardWraps = true
  78. }
  79. // WithHardWraps is a functional option that indicates whether softline breaks
  80. // should be rendered as '<br>'.
  81. func WithHardWraps() interface {
  82. renderer.Option
  83. Option
  84. } {
  85. return &withHardWraps{}
  86. }
  87. // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
  88. const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
  89. // A EastAsianLineBreaks is a style of east asian line breaks.
  90. type EastAsianLineBreaks int
  91. const (
  92. //EastAsianLineBreaksNone renders line breaks as it is.
  93. EastAsianLineBreaksNone EastAsianLineBreaks = iota
  94. // EastAsianLineBreaksSimple follows east_asian_line_breaks in Pandoc.
  95. EastAsianLineBreaksSimple
  96. // EastAsianLineBreaksCSS3Draft follows CSS text level3 "Segment Break Transformation Rules" with some enhancements.
  97. EastAsianLineBreaksCSS3Draft
  98. )
  99. func (b EastAsianLineBreaks) softLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
  100. switch b {
  101. case EastAsianLineBreaksNone:
  102. return false
  103. case EastAsianLineBreaksSimple:
  104. return !(util.IsEastAsianWideRune(thisLastRune) && util.IsEastAsianWideRune(siblingFirstRune))
  105. case EastAsianLineBreaksCSS3Draft:
  106. return eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune, siblingFirstRune)
  107. }
  108. return false
  109. }
  110. func eastAsianLineBreaksCSS3DraftSoftLineBreak(thisLastRune rune, siblingFirstRune rune) bool {
  111. // Implements CSS text level3 Segment Break Transformation Rules with some enhancements.
  112. // References:
  113. // - https://www.w3.org/TR/2020/WD-css-text-3-20200429/#line-break-transform
  114. // - https://github.com/w3c/csswg-drafts/issues/5086
  115. // Rule1:
  116. // If the character immediately before or immediately after the segment break is
  117. // the zero-width space character (U+200B), then the break is removed, leaving behind the zero-width space.
  118. if thisLastRune == '\u200B' || siblingFirstRune == '\u200B' {
  119. return false
  120. }
  121. // Rule2:
  122. // Otherwise, if the East Asian Width property of both the character before and after the segment break is
  123. // F, W, or H (not A), and neither side is Hangul, then the segment break is removed.
  124. thisLastRuneEastAsianWidth := util.EastAsianWidth(thisLastRune)
  125. siblingFirstRuneEastAsianWidth := util.EastAsianWidth(siblingFirstRune)
  126. if (thisLastRuneEastAsianWidth == "F" ||
  127. thisLastRuneEastAsianWidth == "W" ||
  128. thisLastRuneEastAsianWidth == "H") &&
  129. (siblingFirstRuneEastAsianWidth == "F" ||
  130. siblingFirstRuneEastAsianWidth == "W" ||
  131. siblingFirstRuneEastAsianWidth == "H") {
  132. return unicode.Is(unicode.Hangul, thisLastRune) || unicode.Is(unicode.Hangul, siblingFirstRune)
  133. }
  134. // Rule3:
  135. // Otherwise, if either the character before or after the segment break belongs to
  136. // the space-discarding character set and it is a Unicode Punctuation (P*) or U+3000,
  137. // then the segment break is removed.
  138. if util.IsSpaceDiscardingUnicodeRune(thisLastRune) ||
  139. unicode.IsPunct(thisLastRune) ||
  140. thisLastRune == '\u3000' ||
  141. util.IsSpaceDiscardingUnicodeRune(siblingFirstRune) ||
  142. unicode.IsPunct(siblingFirstRune) ||
  143. siblingFirstRune == '\u3000' {
  144. return false
  145. }
  146. // Rule4:
  147. // Otherwise, the segment break is converted to a space (U+0020).
  148. return true
  149. }
  150. type withEastAsianLineBreaks struct {
  151. eastAsianLineBreaksStyle EastAsianLineBreaks
  152. }
  153. func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
  154. c.Options[optEastAsianLineBreaks] = o.eastAsianLineBreaksStyle
  155. }
  156. func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
  157. c.EastAsianLineBreaks = o.eastAsianLineBreaksStyle
  158. }
  159. // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
  160. // between east asian wide characters should be ignored.
  161. func WithEastAsianLineBreaks(e EastAsianLineBreaks) interface {
  162. renderer.Option
  163. Option
  164. } {
  165. return &withEastAsianLineBreaks{e}
  166. }
  167. // XHTML is an option name used in WithXHTML.
  168. const optXHTML renderer.OptionName = "XHTML"
  169. type withXHTML struct {
  170. }
  171. func (o *withXHTML) SetConfig(c *renderer.Config) {
  172. c.Options[optXHTML] = true
  173. }
  174. func (o *withXHTML) SetHTMLOption(c *Config) {
  175. c.XHTML = true
  176. }
  177. // WithXHTML is a functional option indicates that nodes should be rendered in
  178. // xhtml instead of HTML5.
  179. func WithXHTML() interface {
  180. Option
  181. renderer.Option
  182. } {
  183. return &withXHTML{}
  184. }
  185. // Unsafe is an option name used in WithUnsafe.
  186. const optUnsafe renderer.OptionName = "Unsafe"
  187. type withUnsafe struct {
  188. }
  189. func (o *withUnsafe) SetConfig(c *renderer.Config) {
  190. c.Options[optUnsafe] = true
  191. }
  192. func (o *withUnsafe) SetHTMLOption(c *Config) {
  193. c.Unsafe = true
  194. }
  195. // WithUnsafe is a functional option that renders dangerous contents
  196. // (raw htmls and potentially dangerous links) as it is.
  197. func WithUnsafe() interface {
  198. renderer.Option
  199. Option
  200. } {
  201. return &withUnsafe{}
  202. }
  203. // A Renderer struct is an implementation of renderer.NodeRenderer that renders
  204. // nodes as (X)HTML.
  205. type Renderer struct {
  206. Config
  207. }
  208. // NewRenderer returns a new Renderer with given options.
  209. func NewRenderer(opts ...Option) renderer.NodeRenderer {
  210. r := &Renderer{
  211. Config: NewConfig(),
  212. }
  213. for _, opt := range opts {
  214. opt.SetHTMLOption(&r.Config)
  215. }
  216. return r
  217. }
  218. // RegisterFuncs implements NodeRenderer.RegisterFuncs .
  219. func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
  220. // blocks
  221. reg.Register(ast.KindDocument, r.renderDocument)
  222. reg.Register(ast.KindHeading, r.renderHeading)
  223. reg.Register(ast.KindBlockquote, r.renderBlockquote)
  224. reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
  225. reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
  226. reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
  227. reg.Register(ast.KindList, r.renderList)
  228. reg.Register(ast.KindListItem, r.renderListItem)
  229. reg.Register(ast.KindParagraph, r.renderParagraph)
  230. reg.Register(ast.KindTextBlock, r.renderTextBlock)
  231. reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
  232. // inlines
  233. reg.Register(ast.KindAutoLink, r.renderAutoLink)
  234. reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
  235. reg.Register(ast.KindEmphasis, r.renderEmphasis)
  236. reg.Register(ast.KindImage, r.renderImage)
  237. reg.Register(ast.KindLink, r.renderLink)
  238. reg.Register(ast.KindRawHTML, r.renderRawHTML)
  239. reg.Register(ast.KindText, r.renderText)
  240. reg.Register(ast.KindString, r.renderString)
  241. }
  242. func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
  243. l := n.Lines().Len()
  244. for i := 0; i < l; i++ {
  245. line := n.Lines().At(i)
  246. r.Writer.RawWrite(w, line.Value(source))
  247. }
  248. }
  249. // GlobalAttributeFilter defines attribute names which any elements can have.
  250. var GlobalAttributeFilter = util.NewBytesFilter(
  251. []byte("accesskey"),
  252. []byte("autocapitalize"),
  253. []byte("autofocus"),
  254. []byte("class"),
  255. []byte("contenteditable"),
  256. []byte("dir"),
  257. []byte("draggable"),
  258. []byte("enterkeyhint"),
  259. []byte("hidden"),
  260. []byte("id"),
  261. []byte("inert"),
  262. []byte("inputmode"),
  263. []byte("is"),
  264. []byte("itemid"),
  265. []byte("itemprop"),
  266. []byte("itemref"),
  267. []byte("itemscope"),
  268. []byte("itemtype"),
  269. []byte("lang"),
  270. []byte("part"),
  271. []byte("role"),
  272. []byte("slot"),
  273. []byte("spellcheck"),
  274. []byte("style"),
  275. []byte("tabindex"),
  276. []byte("title"),
  277. []byte("translate"),
  278. )
  279. func (r *Renderer) renderDocument(
  280. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  281. // nothing to do
  282. return ast.WalkContinue, nil
  283. }
  284. // HeadingAttributeFilter defines attribute names which heading elements can have.
  285. var HeadingAttributeFilter = GlobalAttributeFilter
  286. func (r *Renderer) renderHeading(
  287. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  288. n := node.(*ast.Heading)
  289. if entering {
  290. _, _ = w.WriteString("<h")
  291. _ = w.WriteByte("0123456"[n.Level])
  292. if n.Attributes() != nil {
  293. RenderAttributes(w, node, HeadingAttributeFilter)
  294. }
  295. _ = w.WriteByte('>')
  296. } else {
  297. _, _ = w.WriteString("</h")
  298. _ = w.WriteByte("0123456"[n.Level])
  299. _, _ = w.WriteString(">\n")
  300. }
  301. return ast.WalkContinue, nil
  302. }
  303. // BlockquoteAttributeFilter defines attribute names which blockquote elements can have.
  304. var BlockquoteAttributeFilter = GlobalAttributeFilter.Extend(
  305. []byte("cite"),
  306. )
  307. func (r *Renderer) renderBlockquote(
  308. w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  309. if entering {
  310. if n.Attributes() != nil {
  311. _, _ = w.WriteString("<blockquote")
  312. RenderAttributes(w, n, BlockquoteAttributeFilter)
  313. _ = w.WriteByte('>')
  314. } else {
  315. _, _ = w.WriteString("<blockquote>\n")
  316. }
  317. } else {
  318. _, _ = w.WriteString("</blockquote>\n")
  319. }
  320. return ast.WalkContinue, nil
  321. }
  322. func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  323. if entering {
  324. _, _ = w.WriteString("<pre><code>")
  325. r.writeLines(w, source, n)
  326. } else {
  327. _, _ = w.WriteString("</code></pre>\n")
  328. }
  329. return ast.WalkContinue, nil
  330. }
  331. func (r *Renderer) renderFencedCodeBlock(
  332. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  333. n := node.(*ast.FencedCodeBlock)
  334. if entering {
  335. _, _ = w.WriteString("<pre><code")
  336. language := n.Language(source)
  337. if language != nil {
  338. _, _ = w.WriteString(" class=\"language-")
  339. r.Writer.Write(w, language)
  340. _, _ = w.WriteString("\"")
  341. }
  342. _ = w.WriteByte('>')
  343. r.writeLines(w, source, n)
  344. } else {
  345. _, _ = w.WriteString("</code></pre>\n")
  346. }
  347. return ast.WalkContinue, nil
  348. }
  349. func (r *Renderer) renderHTMLBlock(
  350. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  351. n := node.(*ast.HTMLBlock)
  352. if entering {
  353. if r.Unsafe {
  354. l := n.Lines().Len()
  355. for i := 0; i < l; i++ {
  356. line := n.Lines().At(i)
  357. r.Writer.SecureWrite(w, line.Value(source))
  358. }
  359. } else {
  360. _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
  361. }
  362. } else {
  363. if n.HasClosure() {
  364. if r.Unsafe {
  365. closure := n.ClosureLine
  366. r.Writer.SecureWrite(w, closure.Value(source))
  367. } else {
  368. _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
  369. }
  370. }
  371. }
  372. return ast.WalkContinue, nil
  373. }
  374. // ListAttributeFilter defines attribute names which list elements can have.
  375. var ListAttributeFilter = GlobalAttributeFilter.Extend(
  376. []byte("start"),
  377. []byte("reversed"),
  378. []byte("type"),
  379. )
  380. func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  381. n := node.(*ast.List)
  382. tag := "ul"
  383. if n.IsOrdered() {
  384. tag = "ol"
  385. }
  386. if entering {
  387. _ = w.WriteByte('<')
  388. _, _ = w.WriteString(tag)
  389. if n.IsOrdered() && n.Start != 1 {
  390. fmt.Fprintf(w, " start=\"%d\"", n.Start)
  391. }
  392. if n.Attributes() != nil {
  393. RenderAttributes(w, n, ListAttributeFilter)
  394. }
  395. _, _ = w.WriteString(">\n")
  396. } else {
  397. _, _ = w.WriteString("</")
  398. _, _ = w.WriteString(tag)
  399. _, _ = w.WriteString(">\n")
  400. }
  401. return ast.WalkContinue, nil
  402. }
  403. // ListItemAttributeFilter defines attribute names which list item elements can have.
  404. var ListItemAttributeFilter = GlobalAttributeFilter.Extend(
  405. []byte("value"),
  406. )
  407. func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  408. if entering {
  409. if n.Attributes() != nil {
  410. _, _ = w.WriteString("<li")
  411. RenderAttributes(w, n, ListItemAttributeFilter)
  412. _ = w.WriteByte('>')
  413. } else {
  414. _, _ = w.WriteString("<li>")
  415. }
  416. fc := n.FirstChild()
  417. if fc != nil {
  418. if _, ok := fc.(*ast.TextBlock); !ok {
  419. _ = w.WriteByte('\n')
  420. }
  421. }
  422. } else {
  423. _, _ = w.WriteString("</li>\n")
  424. }
  425. return ast.WalkContinue, nil
  426. }
  427. // ParagraphAttributeFilter defines attribute names which paragraph elements can have.
  428. var ParagraphAttributeFilter = GlobalAttributeFilter
  429. func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  430. if entering {
  431. if n.Attributes() != nil {
  432. _, _ = w.WriteString("<p")
  433. RenderAttributes(w, n, ParagraphAttributeFilter)
  434. _ = w.WriteByte('>')
  435. } else {
  436. _, _ = w.WriteString("<p>")
  437. }
  438. } else {
  439. _, _ = w.WriteString("</p>\n")
  440. }
  441. return ast.WalkContinue, nil
  442. }
  443. func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  444. if !entering {
  445. if n.NextSibling() != nil && n.FirstChild() != nil {
  446. _ = w.WriteByte('\n')
  447. }
  448. }
  449. return ast.WalkContinue, nil
  450. }
  451. // ThematicAttributeFilter defines attribute names which hr elements can have.
  452. var ThematicAttributeFilter = GlobalAttributeFilter.Extend(
  453. []byte("align"), // [Deprecated]
  454. []byte("color"), // [Not Standardized]
  455. []byte("noshade"), // [Deprecated]
  456. []byte("size"), // [Deprecated]
  457. []byte("width"), // [Deprecated]
  458. )
  459. func (r *Renderer) renderThematicBreak(
  460. w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  461. if !entering {
  462. return ast.WalkContinue, nil
  463. }
  464. _, _ = w.WriteString("<hr")
  465. if n.Attributes() != nil {
  466. RenderAttributes(w, n, ThematicAttributeFilter)
  467. }
  468. if r.XHTML {
  469. _, _ = w.WriteString(" />\n")
  470. } else {
  471. _, _ = w.WriteString(">\n")
  472. }
  473. return ast.WalkContinue, nil
  474. }
  475. // LinkAttributeFilter defines attribute names which link elements can have.
  476. var LinkAttributeFilter = GlobalAttributeFilter.Extend(
  477. []byte("download"),
  478. // []byte("href"),
  479. []byte("hreflang"),
  480. []byte("media"),
  481. []byte("ping"),
  482. []byte("referrerpolicy"),
  483. []byte("rel"),
  484. []byte("shape"),
  485. []byte("target"),
  486. )
  487. func (r *Renderer) renderAutoLink(
  488. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  489. n := node.(*ast.AutoLink)
  490. if !entering {
  491. return ast.WalkContinue, nil
  492. }
  493. _, _ = w.WriteString(`<a href="`)
  494. url := n.URL(source)
  495. label := n.Label(source)
  496. if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
  497. _, _ = w.WriteString("mailto:")
  498. }
  499. _, _ = w.Write(util.EscapeHTML(util.URLEscape(url, false)))
  500. if n.Attributes() != nil {
  501. _ = w.WriteByte('"')
  502. RenderAttributes(w, n, LinkAttributeFilter)
  503. _ = w.WriteByte('>')
  504. } else {
  505. _, _ = w.WriteString(`">`)
  506. }
  507. _, _ = w.Write(util.EscapeHTML(label))
  508. _, _ = w.WriteString(`</a>`)
  509. return ast.WalkContinue, nil
  510. }
  511. // CodeAttributeFilter defines attribute names which code elements can have.
  512. var CodeAttributeFilter = GlobalAttributeFilter
  513. func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  514. if entering {
  515. if n.Attributes() != nil {
  516. _, _ = w.WriteString("<code")
  517. RenderAttributes(w, n, CodeAttributeFilter)
  518. _ = w.WriteByte('>')
  519. } else {
  520. _, _ = w.WriteString("<code>")
  521. }
  522. for c := n.FirstChild(); c != nil; c = c.NextSibling() {
  523. segment := c.(*ast.Text).Segment
  524. value := segment.Value(source)
  525. if bytes.HasSuffix(value, []byte("\n")) {
  526. r.Writer.RawWrite(w, value[:len(value)-1])
  527. r.Writer.RawWrite(w, []byte(" "))
  528. } else {
  529. r.Writer.RawWrite(w, value)
  530. }
  531. }
  532. return ast.WalkSkipChildren, nil
  533. }
  534. _, _ = w.WriteString("</code>")
  535. return ast.WalkContinue, nil
  536. }
  537. // EmphasisAttributeFilter defines attribute names which emphasis elements can have.
  538. var EmphasisAttributeFilter = GlobalAttributeFilter
  539. func (r *Renderer) renderEmphasis(
  540. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  541. n := node.(*ast.Emphasis)
  542. tag := "em"
  543. if n.Level == 2 {
  544. tag = "strong"
  545. }
  546. if entering {
  547. _ = w.WriteByte('<')
  548. _, _ = w.WriteString(tag)
  549. if n.Attributes() != nil {
  550. RenderAttributes(w, n, EmphasisAttributeFilter)
  551. }
  552. _ = w.WriteByte('>')
  553. } else {
  554. _, _ = w.WriteString("</")
  555. _, _ = w.WriteString(tag)
  556. _ = w.WriteByte('>')
  557. }
  558. return ast.WalkContinue, nil
  559. }
  560. func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  561. n := node.(*ast.Link)
  562. if entering {
  563. _, _ = w.WriteString("<a href=\"")
  564. if r.Unsafe || !IsDangerousURL(n.Destination) {
  565. _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
  566. }
  567. _ = w.WriteByte('"')
  568. if n.Title != nil {
  569. _, _ = w.WriteString(` title="`)
  570. r.Writer.Write(w, n.Title)
  571. _ = w.WriteByte('"')
  572. }
  573. if n.Attributes() != nil {
  574. RenderAttributes(w, n, LinkAttributeFilter)
  575. }
  576. _ = w.WriteByte('>')
  577. } else {
  578. _, _ = w.WriteString("</a>")
  579. }
  580. return ast.WalkContinue, nil
  581. }
  582. // ImageAttributeFilter defines attribute names which image elements can have.
  583. var ImageAttributeFilter = GlobalAttributeFilter.Extend(
  584. []byte("align"),
  585. []byte("border"),
  586. []byte("crossorigin"),
  587. []byte("decoding"),
  588. []byte("height"),
  589. []byte("importance"),
  590. []byte("intrinsicsize"),
  591. []byte("ismap"),
  592. []byte("loading"),
  593. []byte("referrerpolicy"),
  594. []byte("sizes"),
  595. []byte("srcset"),
  596. []byte("usemap"),
  597. []byte("width"),
  598. )
  599. func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  600. if !entering {
  601. return ast.WalkContinue, nil
  602. }
  603. n := node.(*ast.Image)
  604. _, _ = w.WriteString("<img src=\"")
  605. if r.Unsafe || !IsDangerousURL(n.Destination) {
  606. _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
  607. }
  608. _, _ = w.WriteString(`" alt="`)
  609. _, _ = w.Write(nodeToHTMLText(n, source))
  610. _ = w.WriteByte('"')
  611. if n.Title != nil {
  612. _, _ = w.WriteString(` title="`)
  613. r.Writer.Write(w, n.Title)
  614. _ = w.WriteByte('"')
  615. }
  616. if n.Attributes() != nil {
  617. RenderAttributes(w, n, ImageAttributeFilter)
  618. }
  619. if r.XHTML {
  620. _, _ = w.WriteString(" />")
  621. } else {
  622. _, _ = w.WriteString(">")
  623. }
  624. return ast.WalkSkipChildren, nil
  625. }
  626. func (r *Renderer) renderRawHTML(
  627. w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  628. if !entering {
  629. return ast.WalkSkipChildren, nil
  630. }
  631. if r.Unsafe {
  632. n := node.(*ast.RawHTML)
  633. l := n.Segments.Len()
  634. for i := 0; i < l; i++ {
  635. segment := n.Segments.At(i)
  636. _, _ = w.Write(segment.Value(source))
  637. }
  638. return ast.WalkSkipChildren, nil
  639. }
  640. _, _ = w.WriteString("<!-- raw HTML omitted -->")
  641. return ast.WalkSkipChildren, nil
  642. }
  643. func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  644. if !entering {
  645. return ast.WalkContinue, nil
  646. }
  647. n := node.(*ast.Text)
  648. segment := n.Segment
  649. if n.IsRaw() {
  650. r.Writer.RawWrite(w, segment.Value(source))
  651. } else {
  652. value := segment.Value(source)
  653. r.Writer.Write(w, value)
  654. if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
  655. if r.XHTML {
  656. _, _ = w.WriteString("<br />\n")
  657. } else {
  658. _, _ = w.WriteString("<br>\n")
  659. }
  660. } else if n.SoftLineBreak() {
  661. if r.EastAsianLineBreaks != EastAsianLineBreaksNone && len(value) != 0 {
  662. sibling := node.NextSibling()
  663. if sibling != nil && sibling.Kind() == ast.KindText {
  664. if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 {
  665. thisLastRune := util.ToRune(value, len(value)-1)
  666. siblingFirstRune, _ := utf8.DecodeRune(siblingText)
  667. if r.EastAsianLineBreaks.softLineBreak(thisLastRune, siblingFirstRune) {
  668. _ = w.WriteByte('\n')
  669. }
  670. }
  671. }
  672. } else {
  673. _ = w.WriteByte('\n')
  674. }
  675. }
  676. }
  677. return ast.WalkContinue, nil
  678. }
  679. func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  680. if !entering {
  681. return ast.WalkContinue, nil
  682. }
  683. n := node.(*ast.String)
  684. if n.IsCode() {
  685. _, _ = w.Write(n.Value)
  686. } else {
  687. if n.IsRaw() {
  688. r.Writer.RawWrite(w, n.Value)
  689. } else {
  690. r.Writer.Write(w, n.Value)
  691. }
  692. }
  693. return ast.WalkContinue, nil
  694. }
  695. var dataPrefix = []byte("data-")
  696. // RenderAttributes renders given node's attributes.
  697. // You can specify attribute names to render by the filter.
  698. // If filter is nil, RenderAttributes renders all attributes.
  699. func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) {
  700. for _, attr := range node.Attributes() {
  701. if filter != nil && !filter.Contains(attr.Name) {
  702. if !bytes.HasPrefix(attr.Name, dataPrefix) {
  703. continue
  704. }
  705. }
  706. _, _ = w.WriteString(" ")
  707. _, _ = w.Write(attr.Name)
  708. _, _ = w.WriteString(`="`)
  709. // TODO: convert numeric values to strings
  710. _, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
  711. _ = w.WriteByte('"')
  712. }
  713. }
  714. // A Writer interface writes textual contents to a writer.
  715. type Writer interface {
  716. // Write writes the given source to writer with resolving references and unescaping
  717. // backslash escaped characters.
  718. Write(writer util.BufWriter, source []byte)
  719. // RawWrite writes the given source to writer without resolving references and
  720. // unescaping backslash escaped characters.
  721. RawWrite(writer util.BufWriter, source []byte)
  722. // SecureWrite writes the given source to writer with replacing insecure characters.
  723. SecureWrite(writer util.BufWriter, source []byte)
  724. }
  725. var replacementCharacter = []byte("\ufffd")
  726. // A WriterConfig struct has configurations for the HTML based writers.
  727. type WriterConfig struct {
  728. // EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
  729. EscapedSpace bool
  730. }
  731. // A WriterOption interface sets options for HTML based writers.
  732. type WriterOption func(*WriterConfig)
  733. // WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
  734. func WithEscapedSpace() WriterOption {
  735. return func(c *WriterConfig) {
  736. c.EscapedSpace = true
  737. }
  738. }
  739. type defaultWriter struct {
  740. WriterConfig
  741. }
  742. // NewWriter returns a new Writer.
  743. func NewWriter(opts ...WriterOption) Writer {
  744. w := &defaultWriter{}
  745. for _, opt := range opts {
  746. opt(&w.WriterConfig)
  747. }
  748. return w
  749. }
  750. func escapeRune(writer util.BufWriter, r rune) {
  751. if r < 256 {
  752. v := util.EscapeHTMLByte(byte(r))
  753. if v != nil {
  754. _, _ = writer.Write(v)
  755. return
  756. }
  757. }
  758. _, _ = writer.WriteRune(util.ToValidRune(r))
  759. }
  760. func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
  761. n := 0
  762. l := len(source)
  763. for i := 0; i < l; i++ {
  764. if source[i] == '\u0000' {
  765. _, _ = writer.Write(source[i-n : i])
  766. n = 0
  767. _, _ = writer.Write(replacementCharacter)
  768. continue
  769. }
  770. n++
  771. }
  772. if n != 0 {
  773. _, _ = writer.Write(source[l-n:])
  774. }
  775. }
  776. func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
  777. n := 0
  778. l := len(source)
  779. for i := 0; i < l; i++ {
  780. v := util.EscapeHTMLByte(source[i])
  781. if v != nil {
  782. _, _ = writer.Write(source[i-n : i])
  783. n = 0
  784. _, _ = writer.Write(v)
  785. continue
  786. }
  787. n++
  788. }
  789. if n != 0 {
  790. _, _ = writer.Write(source[l-n:])
  791. }
  792. }
  793. func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
  794. escaped := false
  795. var ok bool
  796. limit := len(source)
  797. n := 0
  798. for i := 0; i < limit; i++ {
  799. c := source[i]
  800. if escaped {
  801. if util.IsPunct(c) {
  802. d.RawWrite(writer, source[n:i-1])
  803. n = i
  804. escaped = false
  805. continue
  806. }
  807. if d.EscapedSpace && c == ' ' {
  808. d.RawWrite(writer, source[n:i-1])
  809. n = i + 1
  810. escaped = false
  811. continue
  812. }
  813. }
  814. if c == '\x00' {
  815. d.RawWrite(writer, source[n:i])
  816. d.RawWrite(writer, replacementCharacter)
  817. n = i + 1
  818. escaped = false
  819. continue
  820. }
  821. if c == '&' {
  822. pos := i
  823. next := i + 1
  824. if next < limit && source[next] == '#' {
  825. nnext := next + 1
  826. if nnext < limit {
  827. nc := source[nnext]
  828. // code point like #x22;
  829. if nnext < limit && nc == 'x' || nc == 'X' {
  830. start := nnext + 1
  831. i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
  832. if ok && i < limit && source[i] == ';' && i-start < 7 {
  833. v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
  834. d.RawWrite(writer, source[n:pos])
  835. n = i + 1
  836. escapeRune(writer, rune(v))
  837. continue
  838. }
  839. // code point like #1234;
  840. } else if nc >= '0' && nc <= '9' {
  841. start := nnext
  842. i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
  843. if ok && i < limit && i-start < 8 && source[i] == ';' {
  844. v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32)
  845. d.RawWrite(writer, source[n:pos])
  846. n = i + 1
  847. escapeRune(writer, rune(v))
  848. continue
  849. }
  850. }
  851. }
  852. } else {
  853. start := next
  854. i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
  855. // entity reference
  856. if ok && i < limit && source[i] == ';' {
  857. name := util.BytesToReadOnlyString(source[start:i])
  858. entity, ok := util.LookUpHTML5EntityByName(name)
  859. if ok {
  860. d.RawWrite(writer, source[n:pos])
  861. n = i + 1
  862. d.RawWrite(writer, entity.Characters)
  863. continue
  864. }
  865. }
  866. }
  867. i = next - 1
  868. }
  869. if c == '\\' {
  870. escaped = true
  871. continue
  872. }
  873. escaped = false
  874. }
  875. d.RawWrite(writer, source[n:])
  876. }
  877. // DefaultWriter is a default instance of the Writer.
  878. var DefaultWriter = NewWriter()
  879. var bDataImage = []byte("data:image/")
  880. var bPng = []byte("png;")
  881. var bGif = []byte("gif;")
  882. var bJpeg = []byte("jpeg;")
  883. var bWebp = []byte("webp;")
  884. var bSvg = []byte("svg+xml;")
  885. var bJs = []byte("javascript:")
  886. var bVb = []byte("vbscript:")
  887. var bFile = []byte("file:")
  888. var bData = []byte("data:")
  889. func hasPrefix(s, prefix []byte) bool {
  890. return len(s) >= len(prefix) && bytes.Equal(bytes.ToLower(s[0:len(prefix)]), bytes.ToLower(prefix))
  891. }
  892. // IsDangerousURL returns true if the given url seems a potentially dangerous url,
  893. // otherwise false.
  894. func IsDangerousURL(url []byte) bool {
  895. if hasPrefix(url, bDataImage) && len(url) >= 11 {
  896. v := url[11:]
  897. if hasPrefix(v, bPng) || hasPrefix(v, bGif) ||
  898. hasPrefix(v, bJpeg) || hasPrefix(v, bWebp) ||
  899. hasPrefix(v, bSvg) {
  900. return false
  901. }
  902. return true
  903. }
  904. return hasPrefix(url, bJs) || hasPrefix(url, bVb) ||
  905. hasPrefix(url, bFile) || hasPrefix(url, bData)
  906. }
  907. func nodeToHTMLText(n ast.Node, source []byte) []byte {
  908. var buf bytes.Buffer
  909. for c := n.FirstChild(); c != nil; c = c.NextSibling() {
  910. if s, ok := c.(*ast.String); ok && s.IsCode() {
  911. buf.Write(s.Text(source))
  912. } else if !c.HasChildren() {
  913. buf.Write(util.EscapeHTML(c.Text(source)))
  914. if t, ok := c.(*ast.Text); ok && t.SoftLineBreak() {
  915. buf.WriteByte('\n')
  916. }
  917. } else {
  918. buf.Write(nodeToHTMLText(c, source))
  919. }
  920. }
  921. return buf.Bytes()
  922. }