knuth.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616
  1. // Copyright 2023 The Knuth Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package knuth collects utilities common to all other packages in this
  5. // repository.
  6. //
  7. // To install all the included go* commands found in cmd/
  8. //
  9. // $ go install modernc.org/knuth/cmd...@latest
  10. //
  11. // Documentation
  12. //
  13. // http://godoc.org/modernc.org/knuth
  14. //
  15. // # Hacking
  16. //
  17. // Make sure you have these utilities from the Tex-live package(s) installed in
  18. // your $PATH:
  19. //
  20. // dvitype
  21. // gftopk
  22. // gftype
  23. // mf
  24. // mft
  25. // pooltype
  26. // tangle
  27. // tex
  28. // tftopl
  29. // vftovp
  30. // vptovf
  31. // weave
  32. //
  33. // These programs are used only to generate test data. Users of
  34. // packages/commands in this repository do not need them installed.
  35. //
  36. // After modification of any sources, run '$ make' in the repository root. That
  37. // will regenerate all applicable Go code and testdata, run tests of all
  38. // packages in this repository and install all the commands found in ./cmd.
  39. //
  40. // If your local clone of the repository is private, you need to setup the
  41. // GOPRIVATE environment variable properly for the tests to pass.
  42. package knuth // modernc.org/knuth
  43. import (
  44. "archive/tar"
  45. "bytes"
  46. "compress/gzip"
  47. "embed"
  48. "fmt"
  49. "go/token"
  50. "io"
  51. "io/fs"
  52. "os"
  53. "path/filepath"
  54. "strings"
  55. "sync"
  56. "time"
  57. "unicode"
  58. "unicode/utf8"
  59. mtoken "modernc.org/token"
  60. )
  61. var (
  62. // ASCII is a RuneValidator accepting runes '\x00' ... '\xff`.
  63. ASCII asciiValidator
  64. // Unicode is a RuneValidator accepting all valid unicode code points
  65. // except those in category Co and Cs.
  66. Unicode unicodeValidator
  67. //go:embed assets.tar.gz
  68. assets embed.FS
  69. // Assets provides some essential resources:
  70. //
  71. // fonts/cm/mf/* from https://ctan.org/tex-archive/systems/knuth/dist/cm
  72. // lib/* from https://ctan.org/tex-archive/systems/knuth/dist/lib
  73. Assets fs.FS = newCFS(assets)
  74. modTime = time.Now()
  75. _ RuneSource = (*Changer)(nil)
  76. _ RuneSource = (*runeSource)(nil)
  77. _ RuneValidator = asciiValidator{}
  78. _ RuneValidator = unicodeValidator{}
  79. _ fs.FS = (*cfs)(nil)
  80. _ fs.File = (*fsFile)(nil)
  81. _ fs.FileInfo = (*fsFile)(nil)
  82. _ io.Seeker = (*fsFile)(nil)
  83. )
  84. // RuneValidator validates runes.
  85. type RuneValidator interface {
  86. // Validate returns true if its argument is in the accepted rune set.
  87. Validate(rune) bool
  88. }
  89. // asciiValidator is a RuneValidator accepting runes '\x00' ... '\xff`.
  90. type asciiValidator struct{}
  91. // Validate implements RuneValidator
  92. func (asciiValidator) Validate(r rune) bool { return r >= 0 && r <= 0xff }
  93. // unicodeValidator is a RuneValidator accepting all valid unicode code points
  94. // except those in category Co and Cs.
  95. type unicodeValidator struct{}
  96. // Validate implements RuneValidator
  97. func (unicodeValidator) Validate(r rune) bool {
  98. if r >= 0 && r <= 255 {
  99. return true
  100. }
  101. return r >= 0 && r <= unicode.MaxRune && !unicode.Is(unicode.Co, r) || !unicode.Is(unicode.Cs, r)
  102. }
  103. // RuneSource provides a finite stream of runes.
  104. type RuneSource interface {
  105. // AddLineColumnInfo adds alternative file, line, and column number information
  106. // for a given file offset. The offset must be larger than the offset for the
  107. // previously added alternative line info and smaller than the file size;
  108. // otherwise the information is ignored.
  109. //
  110. // AddLineColumnInfo is typically used to register alternative position
  111. // information for line directives such as //line filename:line:column.
  112. AddLineColumnInfo(offset int, filename string, line, column int)
  113. // C returns the current rune or and error, if any.
  114. C() (rune, error)
  115. // Consume moves to the next rune, if any.
  116. Consume()
  117. // Position returns the current position.
  118. Position() token.Position
  119. // PositionFor returns the position for zero based offset.
  120. PositionFor(off int) token.Position
  121. }
  122. type runeSource struct {
  123. file *mtoken.File
  124. name string
  125. validator RuneValidator
  126. src []byte
  127. off int
  128. }
  129. // NewRuneSource returns a newly created Source. Positions will be reported as
  130. // coming from 'name'. 'src' is UTF-8 encoded. Decoded runes will be validated
  131. // by 'validator'.
  132. func NewRuneSource(name string, src []byte, validator RuneValidator) RuneSource {
  133. return &runeSource{
  134. file: mtoken.NewFile(name, len(src)),
  135. name: name,
  136. src: src,
  137. validator: validator,
  138. }
  139. }
  140. // AddLineColumnInfo implements RuneSource.
  141. func (s *runeSource) AddLineColumnInfo(offset int, filename string, line, column int) {
  142. s.file.AddLineColumnInfo(offset, filename, line, column)
  143. }
  144. func (s *runeSource) C() (rune, error) {
  145. r, sz := utf8.DecodeRune(s.src[s.off:])
  146. if r == utf8.RuneError {
  147. if sz == 0 {
  148. return 0, io.EOF
  149. }
  150. return 0, fmt.Errorf("%v: invalid rune", s.Position())
  151. }
  152. if !s.validator.Validate(r) {
  153. return 0, fmt.Errorf("%v: invalid rune", s.Position())
  154. }
  155. return r, nil
  156. }
  157. func (s *runeSource) Position() token.Position {
  158. return s.PositionFor(s.off)
  159. }
  160. func (s *runeSource) PositionFor(off int) token.Position {
  161. return token.Position(s.file.PositionFor(mtoken.Pos(s.file.Base()+off), true))
  162. }
  163. // Consume moves s past the current rune to the next one, if any.
  164. func (s *runeSource) Consume() {
  165. r, sz := utf8.DecodeRune(s.src[s.off:])
  166. if r == '\n' {
  167. s.file.AddLine(s.file.Base() + s.off)
  168. }
  169. s.off += sz
  170. }
  171. // Line represents a source line and its position.
  172. type Line struct {
  173. Position token.Position
  174. Src string
  175. }
  176. // ReadLine reads from s up to and including the next newline, if any. If s is
  177. // at EOF, (nil, io.EOF) is returned.
  178. func ReadLine(s RuneSource) (line *Line, err error) {
  179. var a []rune
  180. pos := s.Position()
  181. for {
  182. c, err := s.C()
  183. if err != nil {
  184. if err == io.EOF {
  185. if len(a) != 0 {
  186. return &Line{pos, string(a)}, nil
  187. }
  188. return &Line{pos, string(a)}, err
  189. }
  190. return nil, fmt.Errorf("%v: invalid rune", s.Position())
  191. }
  192. a = append(a, c)
  193. s.Consume()
  194. if c == '\n' {
  195. return &Line{pos, string(a)}, nil
  196. }
  197. }
  198. }
  199. // ReadLine2 is like ReadLine but additionally trims trailing space. The final
  200. // '\n' is preserved, if any.
  201. func ReadLine2(s RuneSource) (line *Line, err error) {
  202. line, err = ReadLine(s)
  203. if line != nil {
  204. line.Src = rtrimLine(line.Src)
  205. }
  206. return line, err
  207. }
  208. func rtrimLine(s string) string {
  209. var nl string
  210. if strings.HasSuffix(s, "\n") {
  211. nl = "\n"
  212. s = s[:len(s)-1]
  213. }
  214. return strings.TrimRight(s, " \t") + nl
  215. }
  216. type changerSegment struct {
  217. src RuneSource
  218. b []byte
  219. off0 int
  220. off int
  221. }
  222. // Changer is a RuneSource implementing patching source using a change file.
  223. type Changer struct {
  224. a []*changerSegment
  225. ix int
  226. }
  227. // NewChanger returns a newly created Changer or an error, if any.
  228. func NewChanger(src, changes RuneSource) (*Changer, error) {
  229. var bb bytes.Buffer
  230. out:
  231. for {
  232. line, err := ReadLine2(src)
  233. if err != nil {
  234. if err != io.EOF {
  235. return nil, fmt.Errorf("%v: reading source file: %v", line.Position, err)
  236. }
  237. break out
  238. }
  239. bb.WriteString(line.Src)
  240. }
  241. b := bb.Bytes()
  242. r := &Changer{}
  243. const (
  244. zero = iota
  245. stateX
  246. stateY
  247. )
  248. state := zero
  249. var orig, repl []byte
  250. var soff, coff int
  251. for {
  252. line, err := ReadLine2(changes)
  253. if err != nil {
  254. if err != io.EOF {
  255. return nil, fmt.Errorf("%v: reading change file: %v", line.Position, err)
  256. }
  257. if state != zero {
  258. return nil, fmt.Errorf("%v: reading change file: unexpected EOF", line.Position)
  259. }
  260. if len(b) != 0 {
  261. r.a = append(r.a, &changerSegment{src: src, off0: soff, b: b})
  262. }
  263. return r, nil
  264. }
  265. switch state {
  266. case zero:
  267. if !strings.HasPrefix(line.Src, "@x") {
  268. break
  269. }
  270. state = stateX
  271. orig = nil
  272. repl = nil
  273. case stateX:
  274. if strings.HasPrefix(line.Src, "@y") {
  275. coff = changes.Position().Offset
  276. state = stateY
  277. break
  278. }
  279. orig = append(orig, line.Src...)
  280. case stateY:
  281. if strings.HasPrefix(line.Src, "@z") {
  282. orig = bytes.TrimSpace(orig)
  283. repl = bytes.TrimSpace(repl)
  284. x := bytes.Index(b, orig)
  285. if x < 0 {
  286. return nil, fmt.Errorf("%v: change not found in src", line.Position)
  287. }
  288. r.a = append(r.a, &changerSegment{src: src, off0: soff, b: b[:x]})
  289. b = b[x+len(orig):]
  290. soff += x + len(orig)
  291. r.a = append(r.a, &changerSegment{src: changes, off0: coff, b: repl})
  292. state = zero
  293. break
  294. }
  295. repl = append(repl, line.Src...)
  296. default:
  297. panic(todo("%v: %q", state, line))
  298. }
  299. }
  300. }
  301. // AddLineColumnInfo implements RuneSource, but is a no-op.
  302. func (c *Changer) AddLineColumnInfo(offset int, filename string, line, column int) {}
  303. // C implements RuneSource.
  304. func (c *Changer) C() (rune, error) {
  305. for {
  306. if c.ix < len(c.a) {
  307. s := c.a[c.ix]
  308. if s.off >= len(s.b) {
  309. c.ix++
  310. continue
  311. }
  312. r, _ := utf8.DecodeRune(s.b[s.off:])
  313. if r == utf8.RuneError {
  314. return 0, fmt.Errorf("%v: invalid rune", s.src.PositionFor(s.off))
  315. }
  316. return r, nil
  317. }
  318. return 0, io.EOF
  319. }
  320. }
  321. // Consume implements RuneSource.
  322. func (c *Changer) Consume() {
  323. if c.ix < len(c.a) {
  324. s := c.a[c.ix]
  325. _, sz := utf8.DecodeRune(s.b[s.off:])
  326. s.off += sz
  327. if s.off >= len(s.b) {
  328. c.ix++
  329. }
  330. }
  331. }
  332. // Position implements RuneSource.
  333. func (c *Changer) Position() (r token.Position) {
  334. if c.ix < len(c.a) {
  335. s := c.a[c.ix]
  336. return s.src.PositionFor(s.off0 + s.off)
  337. }
  338. if len(c.a) == 0 {
  339. return r
  340. }
  341. s := c.a[len(c.a)-1]
  342. return s.src.PositionFor(s.off0 + s.off)
  343. }
  344. // PositionFor implements RuneSource.
  345. func (c *Changer) PositionFor(off int) token.Position {
  346. panic(todo(""))
  347. }
  348. type fsFile struct {
  349. b []byte
  350. name string
  351. off int64
  352. }
  353. func (f *fsFile) Close() error { return nil }
  354. func (f *fsFile) IsDir() bool { return false }
  355. func (f *fsFile) ModTime() time.Time { return modTime }
  356. func (f *fsFile) Mode() fs.FileMode { return 0400 }
  357. func (f *fsFile) Name() string { return f.name }
  358. func (f *fsFile) Size() int64 { return int64(len(f.b)) }
  359. func (f *fsFile) Stat() (fs.FileInfo, error) { return f, nil }
  360. func (f *fsFile) Sys() interface{} { return nil }
  361. func (f *fsFile) Seek(off int64, whence int) (int64, error) {
  362. switch whence {
  363. case io.SeekCurrent:
  364. f.off += off
  365. case io.SeekStart:
  366. f.off = off
  367. case io.SeekEnd:
  368. f.off = int64(len(f.b)) + off
  369. }
  370. if f.off < 0 {
  371. f.off = 0
  372. return 0, fmt.Errorf("invalid seek")
  373. }
  374. if f.off > int64(len(f.b)) {
  375. f.off = int64(len(f.b))
  376. return 0, fmt.Errorf("invalid seek")
  377. }
  378. return f.off, nil
  379. }
  380. func (f *fsFile) Read(b []byte) (r int, err error) {
  381. r = copy(b, f.b[f.off:])
  382. f.off += int64(r)
  383. if r == 0 {
  384. err = io.EOF
  385. }
  386. return r, err
  387. }
  388. type cfs struct {
  389. m map[string][]byte
  390. r io.ReadSeeker
  391. sync.Mutex
  392. }
  393. func newCFS(fs fs.FS) *cfs {
  394. r, err := fs.Open("assets.tar.gz")
  395. if err != nil {
  396. panic(todo("", err))
  397. }
  398. return &cfs{
  399. m: map[string][]byte{},
  400. r: r.(io.ReadSeeker),
  401. }
  402. }
  403. func (f *cfs) Open(name string) (fs.File, error) {
  404. f.Lock()
  405. defer f.Unlock()
  406. b, ok := f.m[name]
  407. if !ok {
  408. f.r.Seek(0, io.SeekStart)
  409. gr, err := gzip.NewReader(f.r)
  410. if err != nil {
  411. return nil, fmt.Errorf("%s: %v", name, err)
  412. }
  413. tr := tar.NewReader(gr)
  414. for {
  415. hdr, err := tr.Next()
  416. if err != nil {
  417. if err == io.EOF {
  418. return nil, fmt.Errorf("%s: no such file", name)
  419. }
  420. return nil, fmt.Errorf("%s: %v", name, err)
  421. }
  422. if hdr.Name != name {
  423. continue
  424. }
  425. if b, err = io.ReadAll(tr); err != nil {
  426. return nil, fmt.Errorf("%s: %v", name, err)
  427. }
  428. f.m[name] = b
  429. break
  430. }
  431. }
  432. return &fsFile{b: b, name: name}, nil
  433. }
  434. // Open attempts to open 'name'. If not successful then it tries to open 'name'
  435. // using 'search' paths. If still not found then Open may try to find the
  436. // resource in Assets.
  437. //
  438. // If all letters of the base name of 'name' are upper case then the preceding
  439. // steps may be extended by additionally looking for the lower case alternative
  440. // of 'name'.
  441. //
  442. // The caller is responsible to properly .Close any returned non-nil fs.Files
  443. // to avoid resource exhaustion.
  444. func Open(name string, search []string) (f fs.File, err error) {
  445. if debug {
  446. defer func() {
  447. trc("Open(%q) %q -> %p %v", name, search, f, err)
  448. }()
  449. }
  450. if f, err := os.Open(name); err == nil {
  451. return f, nil
  452. }
  453. const (
  454. mfBasesArea = "MFbases:"
  455. mfInputsArea = "MFinputs:"
  456. texFontsArea = "TeXfonts:"
  457. texFormatsArea = "TeXformats:"
  458. texInputsArea = "TeXinputs:"
  459. )
  460. var area, dir, base, lcBase, ext string
  461. switch {
  462. case strings.HasPrefix(name, texFontsArea):
  463. area = texFontsArea
  464. base = name[len(texFontsArea):]
  465. case strings.HasPrefix(name, texInputsArea):
  466. area = texInputsArea
  467. base = name[len(texInputsArea):]
  468. case strings.HasPrefix(name, texFormatsArea):
  469. area = texFormatsArea
  470. base = name[len(texFormatsArea):]
  471. case strings.HasPrefix(name, mfBasesArea):
  472. area = mfBasesArea
  473. base = name[len(mfBasesArea):]
  474. case strings.HasPrefix(name, mfInputsArea):
  475. area = mfInputsArea
  476. base = name[len(mfInputsArea):]
  477. default:
  478. dir, base = filepath.Split(name)
  479. }
  480. if ucBase := strings.ToUpper(base); ucBase == base {
  481. lcBase = strings.ToLower(base)
  482. }
  483. if f, err := os.Open(filepath.Join(dir, base)); err == nil {
  484. return f, nil
  485. }
  486. if lcBase != "" {
  487. if f, err := os.Open(filepath.Join(dir, lcBase)); err == nil {
  488. return f, nil
  489. }
  490. }
  491. for _, path := range search {
  492. if f, err := os.Open(filepath.Join(path, base)); err == nil {
  493. return f, nil
  494. }
  495. if lcBase != "" {
  496. if f, err := os.Open(filepath.Join(path, lcBase)); err == nil {
  497. return f, nil
  498. }
  499. }
  500. }
  501. if area != "" {
  502. base = strings.ToLower(base)
  503. }
  504. ext = filepath.Ext(base)
  505. switch area {
  506. case texFontsArea:
  507. switch ext {
  508. case ".tfm":
  509. dir = "fonts/cm/tfm/"
  510. default:
  511. panic(todo("%q", name))
  512. }
  513. case
  514. mfInputsArea,
  515. texFormatsArea,
  516. texInputsArea:
  517. dir = "lib/"
  518. case mfBasesArea:
  519. dir = "mfbases/"
  520. default:
  521. return nil, fmt.Errorf("%s: no such file (searched %v)", name, search)
  522. }
  523. fn := dir + base
  524. if debug {
  525. trc("Open(%q) trying assets: %q", name, dir)
  526. }
  527. return Assets.Open(fn)
  528. }