html.go 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. package html
  2. import (
  3. "bytes"
  4. "fmt"
  5. "strconv"
  6. "unicode/utf8"
  7. "github.com/yuin/goldmark/ast"
  8. "github.com/yuin/goldmark/renderer"
  9. "github.com/yuin/goldmark/util"
  10. )
  11. // A Config struct has configurations for the HTML based renderers.
  12. type Config struct {
  13. Writer Writer
  14. HardWraps bool
  15. EastAsianLineBreaks bool
  16. XHTML bool
  17. Unsafe bool
  18. }
  19. // NewConfig returns a new Config with defaults.
  20. func NewConfig() Config {
  21. return Config{
  22. Writer: DefaultWriter,
  23. HardWraps: false,
  24. EastAsianLineBreaks: false,
  25. XHTML: false,
  26. Unsafe: false,
  27. }
  28. }
  29. // SetOption implements renderer.NodeRenderer.SetOption.
  30. func (c *Config) SetOption(name renderer.OptionName, value interface{}) {
  31. switch name {
  32. case optHardWraps:
  33. c.HardWraps = value.(bool)
  34. case optEastAsianLineBreaks:
  35. c.EastAsianLineBreaks = value.(bool)
  36. case optXHTML:
  37. c.XHTML = value.(bool)
  38. case optUnsafe:
  39. c.Unsafe = value.(bool)
  40. case optTextWriter:
  41. c.Writer = value.(Writer)
  42. }
  43. }
  44. // An Option interface sets options for HTML based renderers.
  45. type Option interface {
  46. SetHTMLOption(*Config)
  47. }
  48. // TextWriter is an option name used in WithWriter.
  49. const optTextWriter renderer.OptionName = "Writer"
  50. type withWriter struct {
  51. value Writer
  52. }
  53. func (o *withWriter) SetConfig(c *renderer.Config) {
  54. c.Options[optTextWriter] = o.value
  55. }
  56. func (o *withWriter) SetHTMLOption(c *Config) {
  57. c.Writer = o.value
  58. }
  59. // WithWriter is a functional option that allow you to set the given writer to
  60. // the renderer.
  61. func WithWriter(writer Writer) interface {
  62. renderer.Option
  63. Option
  64. } {
  65. return &withWriter{writer}
  66. }
  67. // HardWraps is an option name used in WithHardWraps.
  68. const optHardWraps renderer.OptionName = "HardWraps"
  69. type withHardWraps struct {
  70. }
  71. func (o *withHardWraps) SetConfig(c *renderer.Config) {
  72. c.Options[optHardWraps] = true
  73. }
  74. func (o *withHardWraps) SetHTMLOption(c *Config) {
  75. c.HardWraps = true
  76. }
  77. // WithHardWraps is a functional option that indicates whether softline breaks
  78. // should be rendered as '<br>'.
  79. func WithHardWraps() interface {
  80. renderer.Option
  81. Option
  82. } {
  83. return &withHardWraps{}
  84. }
  85. // EastAsianLineBreaks is an option name used in WithEastAsianLineBreaks.
  86. const optEastAsianLineBreaks renderer.OptionName = "EastAsianLineBreaks"
  87. type withEastAsianLineBreaks struct {
  88. }
  89. func (o *withEastAsianLineBreaks) SetConfig(c *renderer.Config) {
  90. c.Options[optEastAsianLineBreaks] = true
  91. }
  92. func (o *withEastAsianLineBreaks) SetHTMLOption(c *Config) {
  93. c.EastAsianLineBreaks = true
  94. }
  95. // WithEastAsianLineBreaks is a functional option that indicates whether softline breaks
  96. // between east asian wide characters should be ignored.
  97. func WithEastAsianLineBreaks() interface {
  98. renderer.Option
  99. Option
  100. } {
  101. return &withEastAsianLineBreaks{}
  102. }
  103. // XHTML is an option name used in WithXHTML.
  104. const optXHTML renderer.OptionName = "XHTML"
  105. type withXHTML struct {
  106. }
  107. func (o *withXHTML) SetConfig(c *renderer.Config) {
  108. c.Options[optXHTML] = true
  109. }
  110. func (o *withXHTML) SetHTMLOption(c *Config) {
  111. c.XHTML = true
  112. }
  113. // WithXHTML is a functional option indicates that nodes should be rendered in
  114. // xhtml instead of HTML5.
  115. func WithXHTML() interface {
  116. Option
  117. renderer.Option
  118. } {
  119. return &withXHTML{}
  120. }
  121. // Unsafe is an option name used in WithUnsafe.
  122. const optUnsafe renderer.OptionName = "Unsafe"
  123. type withUnsafe struct {
  124. }
  125. func (o *withUnsafe) SetConfig(c *renderer.Config) {
  126. c.Options[optUnsafe] = true
  127. }
  128. func (o *withUnsafe) SetHTMLOption(c *Config) {
  129. c.Unsafe = true
  130. }
  131. // WithUnsafe is a functional option that renders dangerous contents
  132. // (raw htmls and potentially dangerous links) as it is.
  133. func WithUnsafe() interface {
  134. renderer.Option
  135. Option
  136. } {
  137. return &withUnsafe{}
  138. }
  139. // A Renderer struct is an implementation of renderer.NodeRenderer that renders
  140. // nodes as (X)HTML.
  141. type Renderer struct {
  142. Config
  143. }
  144. // NewRenderer returns a new Renderer with given options.
  145. func NewRenderer(opts ...Option) renderer.NodeRenderer {
  146. r := &Renderer{
  147. Config: NewConfig(),
  148. }
  149. for _, opt := range opts {
  150. opt.SetHTMLOption(&r.Config)
  151. }
  152. return r
  153. }
  154. // RegisterFuncs implements NodeRenderer.RegisterFuncs .
  155. func (r *Renderer) RegisterFuncs(reg renderer.NodeRendererFuncRegisterer) {
  156. // blocks
  157. reg.Register(ast.KindDocument, r.renderDocument)
  158. reg.Register(ast.KindHeading, r.renderHeading)
  159. reg.Register(ast.KindBlockquote, r.renderBlockquote)
  160. reg.Register(ast.KindCodeBlock, r.renderCodeBlock)
  161. reg.Register(ast.KindFencedCodeBlock, r.renderFencedCodeBlock)
  162. reg.Register(ast.KindHTMLBlock, r.renderHTMLBlock)
  163. reg.Register(ast.KindList, r.renderList)
  164. reg.Register(ast.KindListItem, r.renderListItem)
  165. reg.Register(ast.KindParagraph, r.renderParagraph)
  166. reg.Register(ast.KindTextBlock, r.renderTextBlock)
  167. reg.Register(ast.KindThematicBreak, r.renderThematicBreak)
  168. // inlines
  169. reg.Register(ast.KindAutoLink, r.renderAutoLink)
  170. reg.Register(ast.KindCodeSpan, r.renderCodeSpan)
  171. reg.Register(ast.KindEmphasis, r.renderEmphasis)
  172. reg.Register(ast.KindImage, r.renderImage)
  173. reg.Register(ast.KindLink, r.renderLink)
  174. reg.Register(ast.KindRawHTML, r.renderRawHTML)
  175. reg.Register(ast.KindText, r.renderText)
  176. reg.Register(ast.KindString, r.renderString)
  177. }
  178. func (r *Renderer) writeLines(w util.BufWriter, source []byte, n ast.Node) {
  179. l := n.Lines().Len()
  180. for i := 0; i < l; i++ {
  181. line := n.Lines().At(i)
  182. r.Writer.RawWrite(w, line.Value(source))
  183. }
  184. }
  185. // GlobalAttributeFilter defines attribute names which any elements can have.
  186. var GlobalAttributeFilter = util.NewBytesFilter(
  187. []byte("accesskey"),
  188. []byte("autocapitalize"),
  189. []byte("autofocus"),
  190. []byte("class"),
  191. []byte("contenteditable"),
  192. []byte("dir"),
  193. []byte("draggable"),
  194. []byte("enterkeyhint"),
  195. []byte("hidden"),
  196. []byte("id"),
  197. []byte("inert"),
  198. []byte("inputmode"),
  199. []byte("is"),
  200. []byte("itemid"),
  201. []byte("itemprop"),
  202. []byte("itemref"),
  203. []byte("itemscope"),
  204. []byte("itemtype"),
  205. []byte("lang"),
  206. []byte("part"),
  207. []byte("role"),
  208. []byte("slot"),
  209. []byte("spellcheck"),
  210. []byte("style"),
  211. []byte("tabindex"),
  212. []byte("title"),
  213. []byte("translate"),
  214. )
  215. func (r *Renderer) renderDocument(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  216. // nothing to do
  217. return ast.WalkContinue, nil
  218. }
  219. // HeadingAttributeFilter defines attribute names which heading elements can have
  220. var HeadingAttributeFilter = GlobalAttributeFilter
  221. func (r *Renderer) renderHeading(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  222. n := node.(*ast.Heading)
  223. if entering {
  224. _, _ = w.WriteString("<h")
  225. _ = w.WriteByte("0123456"[n.Level])
  226. if n.Attributes() != nil {
  227. RenderAttributes(w, node, HeadingAttributeFilter)
  228. }
  229. _ = w.WriteByte('>')
  230. } else {
  231. _, _ = w.WriteString("</h")
  232. _ = w.WriteByte("0123456"[n.Level])
  233. _, _ = w.WriteString(">\n")
  234. }
  235. return ast.WalkContinue, nil
  236. }
  237. // BlockquoteAttributeFilter defines attribute names which blockquote elements can have
  238. var BlockquoteAttributeFilter = GlobalAttributeFilter.Extend(
  239. []byte("cite"),
  240. )
  241. func (r *Renderer) renderBlockquote(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  242. if entering {
  243. if n.Attributes() != nil {
  244. _, _ = w.WriteString("<blockquote")
  245. RenderAttributes(w, n, BlockquoteAttributeFilter)
  246. _ = w.WriteByte('>')
  247. } else {
  248. _, _ = w.WriteString("<blockquote>\n")
  249. }
  250. } else {
  251. _, _ = w.WriteString("</blockquote>\n")
  252. }
  253. return ast.WalkContinue, nil
  254. }
  255. func (r *Renderer) renderCodeBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  256. if entering {
  257. _, _ = w.WriteString("<pre><code>")
  258. r.writeLines(w, source, n)
  259. } else {
  260. _, _ = w.WriteString("</code></pre>\n")
  261. }
  262. return ast.WalkContinue, nil
  263. }
  264. func (r *Renderer) renderFencedCodeBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  265. n := node.(*ast.FencedCodeBlock)
  266. if entering {
  267. _, _ = w.WriteString("<pre><code")
  268. language := n.Language(source)
  269. if language != nil {
  270. _, _ = w.WriteString(" class=\"language-")
  271. r.Writer.Write(w, language)
  272. _, _ = w.WriteString("\"")
  273. }
  274. _ = w.WriteByte('>')
  275. r.writeLines(w, source, n)
  276. } else {
  277. _, _ = w.WriteString("</code></pre>\n")
  278. }
  279. return ast.WalkContinue, nil
  280. }
  281. func (r *Renderer) renderHTMLBlock(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  282. n := node.(*ast.HTMLBlock)
  283. if entering {
  284. if r.Unsafe {
  285. l := n.Lines().Len()
  286. for i := 0; i < l; i++ {
  287. line := n.Lines().At(i)
  288. r.Writer.SecureWrite(w, line.Value(source))
  289. }
  290. } else {
  291. _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
  292. }
  293. } else {
  294. if n.HasClosure() {
  295. if r.Unsafe {
  296. closure := n.ClosureLine
  297. r.Writer.SecureWrite(w, closure.Value(source))
  298. } else {
  299. _, _ = w.WriteString("<!-- raw HTML omitted -->\n")
  300. }
  301. }
  302. }
  303. return ast.WalkContinue, nil
  304. }
  305. // ListAttributeFilter defines attribute names which list elements can have.
  306. var ListAttributeFilter = GlobalAttributeFilter.Extend(
  307. []byte("start"),
  308. []byte("reversed"),
  309. []byte("type"),
  310. )
  311. func (r *Renderer) renderList(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  312. n := node.(*ast.List)
  313. tag := "ul"
  314. if n.IsOrdered() {
  315. tag = "ol"
  316. }
  317. if entering {
  318. _ = w.WriteByte('<')
  319. _, _ = w.WriteString(tag)
  320. if n.IsOrdered() && n.Start != 1 {
  321. fmt.Fprintf(w, " start=\"%d\"", n.Start)
  322. }
  323. if n.Attributes() != nil {
  324. RenderAttributes(w, n, ListAttributeFilter)
  325. }
  326. _, _ = w.WriteString(">\n")
  327. } else {
  328. _, _ = w.WriteString("</")
  329. _, _ = w.WriteString(tag)
  330. _, _ = w.WriteString(">\n")
  331. }
  332. return ast.WalkContinue, nil
  333. }
  334. // ListItemAttributeFilter defines attribute names which list item elements can have.
  335. var ListItemAttributeFilter = GlobalAttributeFilter.Extend(
  336. []byte("value"),
  337. )
  338. func (r *Renderer) renderListItem(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  339. if entering {
  340. if n.Attributes() != nil {
  341. _, _ = w.WriteString("<li")
  342. RenderAttributes(w, n, ListItemAttributeFilter)
  343. _ = w.WriteByte('>')
  344. } else {
  345. _, _ = w.WriteString("<li>")
  346. }
  347. fc := n.FirstChild()
  348. if fc != nil {
  349. if _, ok := fc.(*ast.TextBlock); !ok {
  350. _ = w.WriteByte('\n')
  351. }
  352. }
  353. } else {
  354. _, _ = w.WriteString("</li>\n")
  355. }
  356. return ast.WalkContinue, nil
  357. }
  358. // ParagraphAttributeFilter defines attribute names which paragraph elements can have.
  359. var ParagraphAttributeFilter = GlobalAttributeFilter
  360. func (r *Renderer) renderParagraph(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  361. if entering {
  362. if n.Attributes() != nil {
  363. _, _ = w.WriteString("<p")
  364. RenderAttributes(w, n, ParagraphAttributeFilter)
  365. _ = w.WriteByte('>')
  366. } else {
  367. _, _ = w.WriteString("<p>")
  368. }
  369. } else {
  370. _, _ = w.WriteString("</p>\n")
  371. }
  372. return ast.WalkContinue, nil
  373. }
  374. func (r *Renderer) renderTextBlock(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  375. if !entering {
  376. if _, ok := n.NextSibling().(ast.Node); ok && n.FirstChild() != nil {
  377. _ = w.WriteByte('\n')
  378. }
  379. }
  380. return ast.WalkContinue, nil
  381. }
  382. // ThematicAttributeFilter defines attribute names which hr elements can have.
  383. var ThematicAttributeFilter = GlobalAttributeFilter.Extend(
  384. []byte("align"), // [Deprecated]
  385. []byte("color"), // [Not Standardized]
  386. []byte("noshade"), // [Deprecated]
  387. []byte("size"), // [Deprecated]
  388. []byte("width"), // [Deprecated]
  389. )
  390. func (r *Renderer) renderThematicBreak(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  391. if !entering {
  392. return ast.WalkContinue, nil
  393. }
  394. _, _ = w.WriteString("<hr")
  395. if n.Attributes() != nil {
  396. RenderAttributes(w, n, ThematicAttributeFilter)
  397. }
  398. if r.XHTML {
  399. _, _ = w.WriteString(" />\n")
  400. } else {
  401. _, _ = w.WriteString(">\n")
  402. }
  403. return ast.WalkContinue, nil
  404. }
  405. // LinkAttributeFilter defines attribute names which link elements can have.
  406. var LinkAttributeFilter = GlobalAttributeFilter.Extend(
  407. []byte("download"),
  408. // []byte("href"),
  409. []byte("hreflang"),
  410. []byte("media"),
  411. []byte("ping"),
  412. []byte("referrerpolicy"),
  413. []byte("rel"),
  414. []byte("shape"),
  415. []byte("target"),
  416. )
  417. func (r *Renderer) renderAutoLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  418. n := node.(*ast.AutoLink)
  419. if !entering {
  420. return ast.WalkContinue, nil
  421. }
  422. _, _ = w.WriteString(`<a href="`)
  423. url := n.URL(source)
  424. label := n.Label(source)
  425. if n.AutoLinkType == ast.AutoLinkEmail && !bytes.HasPrefix(bytes.ToLower(url), []byte("mailto:")) {
  426. _, _ = w.WriteString("mailto:")
  427. }
  428. _, _ = w.Write(util.EscapeHTML(util.URLEscape(url, false)))
  429. if n.Attributes() != nil {
  430. _ = w.WriteByte('"')
  431. RenderAttributes(w, n, LinkAttributeFilter)
  432. _ = w.WriteByte('>')
  433. } else {
  434. _, _ = w.WriteString(`">`)
  435. }
  436. _, _ = w.Write(util.EscapeHTML(label))
  437. _, _ = w.WriteString(`</a>`)
  438. return ast.WalkContinue, nil
  439. }
  440. // CodeAttributeFilter defines attribute names which code elements can have.
  441. var CodeAttributeFilter = GlobalAttributeFilter
  442. func (r *Renderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
  443. if entering {
  444. if n.Attributes() != nil {
  445. _, _ = w.WriteString("<code")
  446. RenderAttributes(w, n, CodeAttributeFilter)
  447. _ = w.WriteByte('>')
  448. } else {
  449. _, _ = w.WriteString("<code>")
  450. }
  451. for c := n.FirstChild(); c != nil; c = c.NextSibling() {
  452. segment := c.(*ast.Text).Segment
  453. value := segment.Value(source)
  454. if bytes.HasSuffix(value, []byte("\n")) {
  455. r.Writer.RawWrite(w, value[:len(value)-1])
  456. r.Writer.RawWrite(w, []byte(" "))
  457. } else {
  458. r.Writer.RawWrite(w, value)
  459. }
  460. }
  461. return ast.WalkSkipChildren, nil
  462. }
  463. _, _ = w.WriteString("</code>")
  464. return ast.WalkContinue, nil
  465. }
  466. // EmphasisAttributeFilter defines attribute names which emphasis elements can have.
  467. var EmphasisAttributeFilter = GlobalAttributeFilter
  468. func (r *Renderer) renderEmphasis(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  469. n := node.(*ast.Emphasis)
  470. tag := "em"
  471. if n.Level == 2 {
  472. tag = "strong"
  473. }
  474. if entering {
  475. _ = w.WriteByte('<')
  476. _, _ = w.WriteString(tag)
  477. if n.Attributes() != nil {
  478. RenderAttributes(w, n, EmphasisAttributeFilter)
  479. }
  480. _ = w.WriteByte('>')
  481. } else {
  482. _, _ = w.WriteString("</")
  483. _, _ = w.WriteString(tag)
  484. _ = w.WriteByte('>')
  485. }
  486. return ast.WalkContinue, nil
  487. }
  488. func (r *Renderer) renderLink(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  489. n := node.(*ast.Link)
  490. if entering {
  491. _, _ = w.WriteString("<a href=\"")
  492. if r.Unsafe || !IsDangerousURL(n.Destination) {
  493. _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
  494. }
  495. _ = w.WriteByte('"')
  496. if n.Title != nil {
  497. _, _ = w.WriteString(` title="`)
  498. r.Writer.Write(w, n.Title)
  499. _ = w.WriteByte('"')
  500. }
  501. if n.Attributes() != nil {
  502. RenderAttributes(w, n, LinkAttributeFilter)
  503. }
  504. _ = w.WriteByte('>')
  505. } else {
  506. _, _ = w.WriteString("</a>")
  507. }
  508. return ast.WalkContinue, nil
  509. }
  510. // ImageAttributeFilter defines attribute names which image elements can have.
  511. var ImageAttributeFilter = GlobalAttributeFilter.Extend(
  512. []byte("align"),
  513. []byte("border"),
  514. []byte("crossorigin"),
  515. []byte("decoding"),
  516. []byte("height"),
  517. []byte("importance"),
  518. []byte("intrinsicsize"),
  519. []byte("ismap"),
  520. []byte("loading"),
  521. []byte("referrerpolicy"),
  522. []byte("sizes"),
  523. []byte("srcset"),
  524. []byte("usemap"),
  525. []byte("width"),
  526. )
  527. func (r *Renderer) renderImage(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  528. if !entering {
  529. return ast.WalkContinue, nil
  530. }
  531. n := node.(*ast.Image)
  532. _, _ = w.WriteString("<img src=\"")
  533. if r.Unsafe || !IsDangerousURL(n.Destination) {
  534. _, _ = w.Write(util.EscapeHTML(util.URLEscape(n.Destination, true)))
  535. }
  536. _, _ = w.WriteString(`" alt="`)
  537. _, _ = w.Write(nodeToHTMLText(n, source))
  538. _ = w.WriteByte('"')
  539. if n.Title != nil {
  540. _, _ = w.WriteString(` title="`)
  541. r.Writer.Write(w, n.Title)
  542. _ = w.WriteByte('"')
  543. }
  544. if n.Attributes() != nil {
  545. RenderAttributes(w, n, ImageAttributeFilter)
  546. }
  547. if r.XHTML {
  548. _, _ = w.WriteString(" />")
  549. } else {
  550. _, _ = w.WriteString(">")
  551. }
  552. return ast.WalkSkipChildren, nil
  553. }
  554. func (r *Renderer) renderRawHTML(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  555. if !entering {
  556. return ast.WalkSkipChildren, nil
  557. }
  558. if r.Unsafe {
  559. n := node.(*ast.RawHTML)
  560. l := n.Segments.Len()
  561. for i := 0; i < l; i++ {
  562. segment := n.Segments.At(i)
  563. _, _ = w.Write(segment.Value(source))
  564. }
  565. return ast.WalkSkipChildren, nil
  566. }
  567. _, _ = w.WriteString("<!-- raw HTML omitted -->")
  568. return ast.WalkSkipChildren, nil
  569. }
  570. func (r *Renderer) renderText(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  571. if !entering {
  572. return ast.WalkContinue, nil
  573. }
  574. n := node.(*ast.Text)
  575. segment := n.Segment
  576. if n.IsRaw() {
  577. r.Writer.RawWrite(w, segment.Value(source))
  578. } else {
  579. value := segment.Value(source)
  580. r.Writer.Write(w, value)
  581. if n.HardLineBreak() || (n.SoftLineBreak() && r.HardWraps) {
  582. if r.XHTML {
  583. _, _ = w.WriteString("<br />\n")
  584. } else {
  585. _, _ = w.WriteString("<br>\n")
  586. }
  587. } else if n.SoftLineBreak() {
  588. if r.EastAsianLineBreaks && len(value) != 0 {
  589. sibling := node.NextSibling()
  590. if sibling != nil && sibling.Kind() == ast.KindText {
  591. if siblingText := sibling.(*ast.Text).Text(source); len(siblingText) != 0 {
  592. thisLastRune := util.ToRune(value, len(value)-1)
  593. siblingFirstRune, _ := utf8.DecodeRune(siblingText)
  594. if !(util.IsEastAsianWideRune(thisLastRune) &&
  595. util.IsEastAsianWideRune(siblingFirstRune)) {
  596. _ = w.WriteByte('\n')
  597. }
  598. }
  599. }
  600. } else {
  601. _ = w.WriteByte('\n')
  602. }
  603. }
  604. }
  605. return ast.WalkContinue, nil
  606. }
  607. func (r *Renderer) renderString(w util.BufWriter, source []byte, node ast.Node, entering bool) (ast.WalkStatus, error) {
  608. if !entering {
  609. return ast.WalkContinue, nil
  610. }
  611. n := node.(*ast.String)
  612. if n.IsCode() {
  613. _, _ = w.Write(n.Value)
  614. } else {
  615. if n.IsRaw() {
  616. r.Writer.RawWrite(w, n.Value)
  617. } else {
  618. r.Writer.Write(w, n.Value)
  619. }
  620. }
  621. return ast.WalkContinue, nil
  622. }
  623. var dataPrefix = []byte("data-")
  624. // RenderAttributes renders given node's attributes.
  625. // You can specify attribute names to render by the filter.
  626. // If filter is nil, RenderAttributes renders all attributes.
  627. func RenderAttributes(w util.BufWriter, node ast.Node, filter util.BytesFilter) {
  628. for _, attr := range node.Attributes() {
  629. if filter != nil && !filter.Contains(attr.Name) {
  630. if !bytes.HasPrefix(attr.Name, dataPrefix) {
  631. continue
  632. }
  633. }
  634. _, _ = w.WriteString(" ")
  635. _, _ = w.Write(attr.Name)
  636. _, _ = w.WriteString(`="`)
  637. // TODO: convert numeric values to strings
  638. _, _ = w.Write(util.EscapeHTML(attr.Value.([]byte)))
  639. _ = w.WriteByte('"')
  640. }
  641. }
  642. // A Writer interface writes textual contents to a writer.
  643. type Writer interface {
  644. // Write writes the given source to writer with resolving references and unescaping
  645. // backslash escaped characters.
  646. Write(writer util.BufWriter, source []byte)
  647. // RawWrite writes the given source to writer without resolving references and
  648. // unescaping backslash escaped characters.
  649. RawWrite(writer util.BufWriter, source []byte)
  650. // SecureWrite writes the given source to writer with replacing insecure characters.
  651. SecureWrite(writer util.BufWriter, source []byte)
  652. }
  653. var replacementCharacter = []byte("\ufffd")
  654. // A WriterConfig struct has configurations for the HTML based writers.
  655. type WriterConfig struct {
  656. // EscapedSpace is an option that indicates that a '\' escaped half-space(0x20) should not be rendered.
  657. EscapedSpace bool
  658. }
  659. // A WriterOption interface sets options for HTML based writers.
  660. type WriterOption func(*WriterConfig)
  661. // WithEscapedSpace is a WriterOption indicates that a '\' escaped half-space(0x20) should not be rendered.
  662. func WithEscapedSpace() WriterOption {
  663. return func(c *WriterConfig) {
  664. c.EscapedSpace = true
  665. }
  666. }
  667. type defaultWriter struct {
  668. WriterConfig
  669. }
  670. // NewWriter returns a new Writer.
  671. func NewWriter(opts ...WriterOption) Writer {
  672. w := &defaultWriter{}
  673. for _, opt := range opts {
  674. opt(&w.WriterConfig)
  675. }
  676. return w
  677. }
  678. func escapeRune(writer util.BufWriter, r rune) {
  679. if r < 256 {
  680. v := util.EscapeHTMLByte(byte(r))
  681. if v != nil {
  682. _, _ = writer.Write(v)
  683. return
  684. }
  685. }
  686. _, _ = writer.WriteRune(util.ToValidRune(r))
  687. }
  688. func (d *defaultWriter) SecureWrite(writer util.BufWriter, source []byte) {
  689. n := 0
  690. l := len(source)
  691. for i := 0; i < l; i++ {
  692. if source[i] == '\u0000' {
  693. _, _ = writer.Write(source[i-n : i])
  694. n = 0
  695. _, _ = writer.Write(replacementCharacter)
  696. continue
  697. }
  698. n++
  699. }
  700. if n != 0 {
  701. _, _ = writer.Write(source[l-n:])
  702. }
  703. }
  704. func (d *defaultWriter) RawWrite(writer util.BufWriter, source []byte) {
  705. n := 0
  706. l := len(source)
  707. for i := 0; i < l; i++ {
  708. v := util.EscapeHTMLByte(source[i])
  709. if v != nil {
  710. _, _ = writer.Write(source[i-n : i])
  711. n = 0
  712. _, _ = writer.Write(v)
  713. continue
  714. }
  715. n++
  716. }
  717. if n != 0 {
  718. _, _ = writer.Write(source[l-n:])
  719. }
  720. }
  721. func (d *defaultWriter) Write(writer util.BufWriter, source []byte) {
  722. escaped := false
  723. var ok bool
  724. limit := len(source)
  725. n := 0
  726. for i := 0; i < limit; i++ {
  727. c := source[i]
  728. if escaped {
  729. if util.IsPunct(c) {
  730. d.RawWrite(writer, source[n:i-1])
  731. n = i
  732. escaped = false
  733. continue
  734. }
  735. if d.EscapedSpace && c == ' ' {
  736. d.RawWrite(writer, source[n:i-1])
  737. n = i + 1
  738. escaped = false
  739. continue
  740. }
  741. }
  742. if c == '\x00' {
  743. d.RawWrite(writer, source[n:i])
  744. d.RawWrite(writer, replacementCharacter)
  745. n = i + 1
  746. escaped = false
  747. continue
  748. }
  749. if c == '&' {
  750. pos := i
  751. next := i + 1
  752. if next < limit && source[next] == '#' {
  753. nnext := next + 1
  754. if nnext < limit {
  755. nc := source[nnext]
  756. // code point like #x22;
  757. if nnext < limit && nc == 'x' || nc == 'X' {
  758. start := nnext + 1
  759. i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsHexDecimal)
  760. if ok && i < limit && source[i] == ';' && i-start < 7 {
  761. v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 16, 32)
  762. d.RawWrite(writer, source[n:pos])
  763. n = i + 1
  764. escapeRune(writer, rune(v))
  765. continue
  766. }
  767. // code point like #1234;
  768. } else if nc >= '0' && nc <= '9' {
  769. start := nnext
  770. i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsNumeric)
  771. if ok && i < limit && i-start < 8 && source[i] == ';' {
  772. v, _ := strconv.ParseUint(util.BytesToReadOnlyString(source[start:i]), 10, 32)
  773. d.RawWrite(writer, source[n:pos])
  774. n = i + 1
  775. escapeRune(writer, rune(v))
  776. continue
  777. }
  778. }
  779. }
  780. } else {
  781. start := next
  782. i, ok = util.ReadWhile(source, [2]int{start, limit}, util.IsAlphaNumeric)
  783. // entity reference
  784. if ok && i < limit && source[i] == ';' {
  785. name := util.BytesToReadOnlyString(source[start:i])
  786. entity, ok := util.LookUpHTML5EntityByName(name)
  787. if ok {
  788. d.RawWrite(writer, source[n:pos])
  789. n = i + 1
  790. d.RawWrite(writer, entity.Characters)
  791. continue
  792. }
  793. }
  794. }
  795. i = next - 1
  796. }
  797. if c == '\\' {
  798. escaped = true
  799. continue
  800. }
  801. escaped = false
  802. }
  803. d.RawWrite(writer, source[n:])
  804. }
  805. // DefaultWriter is a default instance of the Writer.
  806. var DefaultWriter = NewWriter()
  807. var bDataImage = []byte("data:image/")
  808. var bPng = []byte("png;")
  809. var bGif = []byte("gif;")
  810. var bJpeg = []byte("jpeg;")
  811. var bWebp = []byte("webp;")
  812. var bSvg = []byte("svg+xml;")
  813. var bJs = []byte("javascript:")
  814. var bVb = []byte("vbscript:")
  815. var bFile = []byte("file:")
  816. var bData = []byte("data:")
  817. // IsDangerousURL returns true if the given url seems a potentially dangerous url,
  818. // otherwise false.
  819. func IsDangerousURL(url []byte) bool {
  820. if bytes.HasPrefix(url, bDataImage) && len(url) >= 11 {
  821. v := url[11:]
  822. if bytes.HasPrefix(v, bPng) || bytes.HasPrefix(v, bGif) ||
  823. bytes.HasPrefix(v, bJpeg) || bytes.HasPrefix(v, bWebp) ||
  824. bytes.HasPrefix(v, bSvg) {
  825. return false
  826. }
  827. return true
  828. }
  829. return bytes.HasPrefix(url, bJs) || bytes.HasPrefix(url, bVb) ||
  830. bytes.HasPrefix(url, bFile) || bytes.HasPrefix(url, bData)
  831. }
  832. func nodeToHTMLText(n ast.Node, source []byte) []byte {
  833. var buf bytes.Buffer
  834. for c := n.FirstChild(); c != nil; c = c.NextSibling() {
  835. if s, ok := c.(*ast.String); ok && s.IsCode() {
  836. buf.Write(s.Text(source))
  837. } else if !c.HasChildren() {
  838. buf.Write(util.EscapeHTML(c.Text(source)))
  839. } else {
  840. buf.Write(nodeToHTMLText(c, source))
  841. }
  842. }
  843. return buf.Bytes()
  844. }