| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616 |
- // Copyright 2023 The Knuth Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- // Package knuth collects utilities common to all other packages in this
- // repository.
- //
- // To install all the included go* commands found in cmd/
- //
- // $ go install modernc.org/knuth/cmd...@latest
- //
- // Documentation
- //
- // http://godoc.org/modernc.org/knuth
- //
- // # Hacking
- //
- // Make sure you have these utilities from the Tex-live package(s) installed in
- // your $PATH:
- //
- // dvitype
- // gftopk
- // gftype
- // mf
- // mft
- // pooltype
- // tangle
- // tex
- // tftopl
- // vftovp
- // vptovf
- // weave
- //
- // These programs are used only to generate test data. Users of
- // packages/commands in this repository do not need them installed.
- //
- // After modification of any sources, run '$ make' in the repository root. That
- // will regenerate all applicable Go code and testdata, run tests of all
- // packages in this repository and install all the commands found in ./cmd.
- //
- // If your local clone of the repository is private, you need to setup the
- // GOPRIVATE environment variable properly for the tests to pass.
- package knuth // modernc.org/knuth
- import (
- "archive/tar"
- "bytes"
- "compress/gzip"
- "embed"
- "fmt"
- "go/token"
- "io"
- "io/fs"
- "os"
- "path/filepath"
- "strings"
- "sync"
- "time"
- "unicode"
- "unicode/utf8"
- mtoken "modernc.org/token"
- )
- var (
- // ASCII is a RuneValidator accepting runes '\x00' ... '\xff`.
- ASCII asciiValidator
- // Unicode is a RuneValidator accepting all valid unicode code points
- // except those in category Co and Cs.
- Unicode unicodeValidator
- //go:embed assets.tar.gz
- assets embed.FS
- // Assets provides some essential resources:
- //
- // fonts/cm/mf/* from https://ctan.org/tex-archive/systems/knuth/dist/cm
- // lib/* from https://ctan.org/tex-archive/systems/knuth/dist/lib
- Assets fs.FS = newCFS(assets)
- modTime = time.Now()
- _ RuneSource = (*Changer)(nil)
- _ RuneSource = (*runeSource)(nil)
- _ RuneValidator = asciiValidator{}
- _ RuneValidator = unicodeValidator{}
- _ fs.FS = (*cfs)(nil)
- _ fs.File = (*fsFile)(nil)
- _ fs.FileInfo = (*fsFile)(nil)
- _ io.Seeker = (*fsFile)(nil)
- )
- // RuneValidator validates runes.
- type RuneValidator interface {
- // Validate returns true if its argument is in the accepted rune set.
- Validate(rune) bool
- }
- // asciiValidator is a RuneValidator accepting runes '\x00' ... '\xff`.
- type asciiValidator struct{}
- // Validate implements RuneValidator
- func (asciiValidator) Validate(r rune) bool { return r >= 0 && r <= 0xff }
- // unicodeValidator is a RuneValidator accepting all valid unicode code points
- // except those in category Co and Cs.
- type unicodeValidator struct{}
- // Validate implements RuneValidator
- func (unicodeValidator) Validate(r rune) bool {
- if r >= 0 && r <= 255 {
- return true
- }
- return r >= 0 && r <= unicode.MaxRune && !unicode.Is(unicode.Co, r) || !unicode.Is(unicode.Cs, r)
- }
- // RuneSource provides a finite stream of runes.
- type RuneSource interface {
- // AddLineColumnInfo adds alternative file, line, and column number information
- // for a given file offset. The offset must be larger than the offset for the
- // previously added alternative line info and smaller than the file size;
- // otherwise the information is ignored.
- //
- // AddLineColumnInfo is typically used to register alternative position
- // information for line directives such as //line filename:line:column.
- AddLineColumnInfo(offset int, filename string, line, column int)
- // C returns the current rune or and error, if any.
- C() (rune, error)
- // Consume moves to the next rune, if any.
- Consume()
- // Position returns the current position.
- Position() token.Position
- // PositionFor returns the position for zero based offset.
- PositionFor(off int) token.Position
- }
- type runeSource struct {
- file *mtoken.File
- name string
- validator RuneValidator
- src []byte
- off int
- }
- // NewRuneSource returns a newly created Source. Positions will be reported as
- // coming from 'name'. 'src' is UTF-8 encoded. Decoded runes will be validated
- // by 'validator'.
- func NewRuneSource(name string, src []byte, validator RuneValidator) RuneSource {
- return &runeSource{
- file: mtoken.NewFile(name, len(src)),
- name: name,
- src: src,
- validator: validator,
- }
- }
- // AddLineColumnInfo implements RuneSource.
- func (s *runeSource) AddLineColumnInfo(offset int, filename string, line, column int) {
- s.file.AddLineColumnInfo(offset, filename, line, column)
- }
- func (s *runeSource) C() (rune, error) {
- r, sz := utf8.DecodeRune(s.src[s.off:])
- if r == utf8.RuneError {
- if sz == 0 {
- return 0, io.EOF
- }
- return 0, fmt.Errorf("%v: invalid rune", s.Position())
- }
- if !s.validator.Validate(r) {
- return 0, fmt.Errorf("%v: invalid rune", s.Position())
- }
- return r, nil
- }
- func (s *runeSource) Position() token.Position {
- return s.PositionFor(s.off)
- }
- func (s *runeSource) PositionFor(off int) token.Position {
- return token.Position(s.file.PositionFor(mtoken.Pos(s.file.Base()+off), true))
- }
- // Consume moves s past the current rune to the next one, if any.
- func (s *runeSource) Consume() {
- r, sz := utf8.DecodeRune(s.src[s.off:])
- if r == '\n' {
- s.file.AddLine(s.file.Base() + s.off)
- }
- s.off += sz
- }
- // Line represents a source line and its position.
- type Line struct {
- Position token.Position
- Src string
- }
- // ReadLine reads from s up to and including the next newline, if any. If s is
- // at EOF, (nil, io.EOF) is returned.
- func ReadLine(s RuneSource) (line *Line, err error) {
- var a []rune
- pos := s.Position()
- for {
- c, err := s.C()
- if err != nil {
- if err == io.EOF {
- if len(a) != 0 {
- return &Line{pos, string(a)}, nil
- }
- return &Line{pos, string(a)}, err
- }
- return nil, fmt.Errorf("%v: invalid rune", s.Position())
- }
- a = append(a, c)
- s.Consume()
- if c == '\n' {
- return &Line{pos, string(a)}, nil
- }
- }
- }
- // ReadLine2 is like ReadLine but additionally trims trailing space. The final
- // '\n' is preserved, if any.
- func ReadLine2(s RuneSource) (line *Line, err error) {
- line, err = ReadLine(s)
- if line != nil {
- line.Src = rtrimLine(line.Src)
- }
- return line, err
- }
- func rtrimLine(s string) string {
- var nl string
- if strings.HasSuffix(s, "\n") {
- nl = "\n"
- s = s[:len(s)-1]
- }
- return strings.TrimRight(s, " \t") + nl
- }
- type changerSegment struct {
- src RuneSource
- b []byte
- off0 int
- off int
- }
- // Changer is a RuneSource implementing patching source using a change file.
- type Changer struct {
- a []*changerSegment
- ix int
- }
- // NewChanger returns a newly created Changer or an error, if any.
- func NewChanger(src, changes RuneSource) (*Changer, error) {
- var bb bytes.Buffer
- out:
- for {
- line, err := ReadLine2(src)
- if err != nil {
- if err != io.EOF {
- return nil, fmt.Errorf("%v: reading source file: %v", line.Position, err)
- }
- break out
- }
- bb.WriteString(line.Src)
- }
- b := bb.Bytes()
- r := &Changer{}
- const (
- zero = iota
- stateX
- stateY
- )
- state := zero
- var orig, repl []byte
- var soff, coff int
- for {
- line, err := ReadLine2(changes)
- if err != nil {
- if err != io.EOF {
- return nil, fmt.Errorf("%v: reading change file: %v", line.Position, err)
- }
- if state != zero {
- return nil, fmt.Errorf("%v: reading change file: unexpected EOF", line.Position)
- }
- if len(b) != 0 {
- r.a = append(r.a, &changerSegment{src: src, off0: soff, b: b})
- }
- return r, nil
- }
- switch state {
- case zero:
- if !strings.HasPrefix(line.Src, "@x") {
- break
- }
- state = stateX
- orig = nil
- repl = nil
- case stateX:
- if strings.HasPrefix(line.Src, "@y") {
- coff = changes.Position().Offset
- state = stateY
- break
- }
- orig = append(orig, line.Src...)
- case stateY:
- if strings.HasPrefix(line.Src, "@z") {
- orig = bytes.TrimSpace(orig)
- repl = bytes.TrimSpace(repl)
- x := bytes.Index(b, orig)
- if x < 0 {
- return nil, fmt.Errorf("%v: change not found in src", line.Position)
- }
- r.a = append(r.a, &changerSegment{src: src, off0: soff, b: b[:x]})
- b = b[x+len(orig):]
- soff += x + len(orig)
- r.a = append(r.a, &changerSegment{src: changes, off0: coff, b: repl})
- state = zero
- break
- }
- repl = append(repl, line.Src...)
- default:
- panic(todo("%v: %q", state, line))
- }
- }
- }
- // AddLineColumnInfo implements RuneSource, but is a no-op.
- func (c *Changer) AddLineColumnInfo(offset int, filename string, line, column int) {}
- // C implements RuneSource.
- func (c *Changer) C() (rune, error) {
- for {
- if c.ix < len(c.a) {
- s := c.a[c.ix]
- if s.off >= len(s.b) {
- c.ix++
- continue
- }
- r, _ := utf8.DecodeRune(s.b[s.off:])
- if r == utf8.RuneError {
- return 0, fmt.Errorf("%v: invalid rune", s.src.PositionFor(s.off))
- }
- return r, nil
- }
- return 0, io.EOF
- }
- }
- // Consume implements RuneSource.
- func (c *Changer) Consume() {
- if c.ix < len(c.a) {
- s := c.a[c.ix]
- _, sz := utf8.DecodeRune(s.b[s.off:])
- s.off += sz
- if s.off >= len(s.b) {
- c.ix++
- }
- }
- }
- // Position implements RuneSource.
- func (c *Changer) Position() (r token.Position) {
- if c.ix < len(c.a) {
- s := c.a[c.ix]
- return s.src.PositionFor(s.off0 + s.off)
- }
- if len(c.a) == 0 {
- return r
- }
- s := c.a[len(c.a)-1]
- return s.src.PositionFor(s.off0 + s.off)
- }
- // PositionFor implements RuneSource.
- func (c *Changer) PositionFor(off int) token.Position {
- panic(todo(""))
- }
- type fsFile struct {
- b []byte
- name string
- off int64
- }
- func (f *fsFile) Close() error { return nil }
- func (f *fsFile) IsDir() bool { return false }
- func (f *fsFile) ModTime() time.Time { return modTime }
- func (f *fsFile) Mode() fs.FileMode { return 0400 }
- func (f *fsFile) Name() string { return f.name }
- func (f *fsFile) Size() int64 { return int64(len(f.b)) }
- func (f *fsFile) Stat() (fs.FileInfo, error) { return f, nil }
- func (f *fsFile) Sys() interface{} { return nil }
- func (f *fsFile) Seek(off int64, whence int) (int64, error) {
- switch whence {
- case io.SeekCurrent:
- f.off += off
- case io.SeekStart:
- f.off = off
- case io.SeekEnd:
- f.off = int64(len(f.b)) + off
- }
- if f.off < 0 {
- f.off = 0
- return 0, fmt.Errorf("invalid seek")
- }
- if f.off > int64(len(f.b)) {
- f.off = int64(len(f.b))
- return 0, fmt.Errorf("invalid seek")
- }
- return f.off, nil
- }
- func (f *fsFile) Read(b []byte) (r int, err error) {
- r = copy(b, f.b[f.off:])
- f.off += int64(r)
- if r == 0 {
- err = io.EOF
- }
- return r, err
- }
- type cfs struct {
- m map[string][]byte
- r io.ReadSeeker
- sync.Mutex
- }
- func newCFS(fs fs.FS) *cfs {
- r, err := fs.Open("assets.tar.gz")
- if err != nil {
- panic(todo("", err))
- }
- return &cfs{
- m: map[string][]byte{},
- r: r.(io.ReadSeeker),
- }
- }
- func (f *cfs) Open(name string) (fs.File, error) {
- f.Lock()
- defer f.Unlock()
- b, ok := f.m[name]
- if !ok {
- f.r.Seek(0, io.SeekStart)
- gr, err := gzip.NewReader(f.r)
- if err != nil {
- return nil, fmt.Errorf("%s: %v", name, err)
- }
- tr := tar.NewReader(gr)
- for {
- hdr, err := tr.Next()
- if err != nil {
- if err == io.EOF {
- return nil, fmt.Errorf("%s: no such file", name)
- }
- return nil, fmt.Errorf("%s: %v", name, err)
- }
- if hdr.Name != name {
- continue
- }
- if b, err = io.ReadAll(tr); err != nil {
- return nil, fmt.Errorf("%s: %v", name, err)
- }
- f.m[name] = b
- break
- }
- }
- return &fsFile{b: b, name: name}, nil
- }
- // Open attempts to open 'name'. If not successful then it tries to open 'name'
- // using 'search' paths. If still not found then Open may try to find the
- // resource in Assets.
- //
- // If all letters of the base name of 'name' are upper case then the preceding
- // steps may be extended by additionally looking for the lower case alternative
- // of 'name'.
- //
- // The caller is responsible to properly .Close any returned non-nil fs.Files
- // to avoid resource exhaustion.
- func Open(name string, search []string) (f fs.File, err error) {
- if debug {
- defer func() {
- trc("Open(%q) %q -> %p %v", name, search, f, err)
- }()
- }
- if f, err := os.Open(name); err == nil {
- return f, nil
- }
- const (
- mfBasesArea = "MFbases:"
- mfInputsArea = "MFinputs:"
- texFontsArea = "TeXfonts:"
- texFormatsArea = "TeXformats:"
- texInputsArea = "TeXinputs:"
- )
- var area, dir, base, lcBase, ext string
- switch {
- case strings.HasPrefix(name, texFontsArea):
- area = texFontsArea
- base = name[len(texFontsArea):]
- case strings.HasPrefix(name, texInputsArea):
- area = texInputsArea
- base = name[len(texInputsArea):]
- case strings.HasPrefix(name, texFormatsArea):
- area = texFormatsArea
- base = name[len(texFormatsArea):]
- case strings.HasPrefix(name, mfBasesArea):
- area = mfBasesArea
- base = name[len(mfBasesArea):]
- case strings.HasPrefix(name, mfInputsArea):
- area = mfInputsArea
- base = name[len(mfInputsArea):]
- default:
- dir, base = filepath.Split(name)
- }
- if ucBase := strings.ToUpper(base); ucBase == base {
- lcBase = strings.ToLower(base)
- }
- if f, err := os.Open(filepath.Join(dir, base)); err == nil {
- return f, nil
- }
- if lcBase != "" {
- if f, err := os.Open(filepath.Join(dir, lcBase)); err == nil {
- return f, nil
- }
- }
- for _, path := range search {
- if f, err := os.Open(filepath.Join(path, base)); err == nil {
- return f, nil
- }
- if lcBase != "" {
- if f, err := os.Open(filepath.Join(path, lcBase)); err == nil {
- return f, nil
- }
- }
- }
- if area != "" {
- base = strings.ToLower(base)
- }
- ext = filepath.Ext(base)
- switch area {
- case texFontsArea:
- switch ext {
- case ".tfm":
- dir = "fonts/cm/tfm/"
- default:
- panic(todo("%q", name))
- }
- case
- mfInputsArea,
- texFormatsArea,
- texInputsArea:
- dir = "lib/"
- case mfBasesArea:
- dir = "mfbases/"
- default:
- return nil, fmt.Errorf("%s: no such file (searched %v)", name, search)
- }
- fn := dir + base
- if debug {
- trc("Open(%q) trying assets: %q", name, dir)
- }
- return Assets.Open(fn)
- }
|