| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446 |
- // Copyright 2022 The Gc Authors. All rights reserved.
- // Use of this source code is governed by a BSD-style
- // license that can be found in the LICENSE file.
- package gc // import "modernc.org/gc/v3"
- import (
- "bytes"
- "fmt"
- "go/token"
- "path/filepath"
- "strings"
- "unicode"
- "unicode/utf8"
- "modernc.org/mathutil"
- mtoken "modernc.org/token"
- )
- var (
- _ Node = (*Token)(nil)
- _ Node = (*nonode)(nil)
- keywords = map[string]token.Token{
- "break": BREAK,
- "case": CASE,
- "chan": CHAN,
- "const": CONST,
- "continue": CONTINUE,
- "default": DEFAULT,
- "defer": DEFER,
- "else": ELSE,
- "fallthrough": FALLTHROUGH,
- "for": FOR,
- "func": FUNC,
- "go": GO,
- "goto": GOTO,
- "if": IF,
- "import": IMPORT,
- "interface": INTERFACE,
- "map": MAP,
- "package": PACKAGE,
- "range": RANGE,
- "return": RETURN,
- "select": SELECT,
- "struct": STRUCT,
- "switch": SWITCH,
- "type": TYPE,
- "var": VAR,
- }
- lineCommentTag = []byte("line ")
- znode = &nonode{}
- )
- type nonode struct{}
- func (*nonode) Position() (r token.Position) { return r }
- func (*nonode) Source(full bool) string { return "" }
- // Token represents a lexeme, its position and its semantic value.
- type Token struct { // 16 bytes on 64 bit arch
- source *source
- ch int32
- index int32
- }
- // Ch returns which token t represents
- func (t Token) Ch() token.Token { return token.Token(t.ch) }
- // Source implements Node.
- func (t Token) Source(full bool) string {
- // trc("%10s %v: #%v sep %v, src %v, buf %v", tokSource(t.Ch()), t.Position(), t.index, t.source.toks[t.index].sep, t.source.toks[t.index].src, len(t.source.buf))
- sep := t.Sep()
- if !full && sep != "" {
- sep = " "
- }
- src := t.Src()
- if !full && strings.ContainsRune(src, '\n') {
- src = " "
- }
- // trc("%q %q -> %q %q", t.Sep(), t.Src(), sep, src)
- return sep + src
- }
- // Positions implements Node.
- func (t Token) Position() (r token.Position) {
- if t.source == nil {
- return r
- }
- s := t.source
- off := mathutil.MinInt32(int32(len(s.buf)), s.toks[t.index].src)
- return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
- }
- // Prev returns the token preceding t or a zero value if no such token exists.
- func (t Token) Prev() (r Token) {
- if index := t.index - 1; index >= 0 {
- s := t.source
- return Token{source: s, ch: s.toks[index].ch, index: index}
- }
- return r
- }
- // Next returns the token following t or a zero value if no such token exists.
- func (t Token) Next() (r Token) {
- if index := t.index + 1; index < int32(len(t.source.toks)) {
- s := t.source
- return Token{source: s, ch: s.toks[index].ch, index: index}
- }
- return r
- }
- // Sep returns any separators, combined, preceding t.
- func (t Token) Sep() string {
- s := t.source
- if p, ok := s.sepPatches[t.index]; ok {
- return p
- }
- return string(s.buf[s.toks[t.index].sep:s.toks[t.index].src])
- }
- // SetSep sets t's separator.
- func (t Token) SetSep(s string) {
- src := t.source
- if src.sepPatches == nil {
- src.sepPatches = map[int32]string{}
- }
- src.sepPatches[t.index] = s
- }
- // Src returns t's source form.
- func (t Token) Src() string {
- s := t.source
- if p, ok := s.srcPatches[t.index]; ok {
- return p
- }
- if t.ch != int32(EOF) {
- next := t.source.off
- if t.index < int32(len(s.toks))-1 {
- next = s.toks[t.index+1].sep
- }
- return string(s.buf[s.toks[t.index].src:next])
- }
- return ""
- }
- // SetSrc sets t's source form.
- func (t Token) SetSrc(s string) {
- src := t.source
- if src.srcPatches == nil {
- src.srcPatches = map[int32]string{}
- }
- src.srcPatches[t.index] = s
- }
- // IsValid reports t is a valid token. Zero value reports false.
- func (t Token) IsValid() bool { return t.source != nil }
- type tok struct { // 12 bytes
- ch int32
- sep int32
- src int32
- }
- func (t *tok) token() token.Token { return token.Token(t.ch) }
- func (t *tok) position(s *source) (r token.Position) {
- off := mathutil.MinInt32(int32(len(s.buf)), t.src)
- return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
- }
- // source represents a single Go source file, editor text buffer etc.
- type source struct {
- buf []byte
- file *mtoken.File
- name string
- sepPatches map[int32]string
- srcPatches map[int32]string
- toks []tok
- base int32
- off int32
- }
- // 'buf' becomes owned by the result and must not be modified afterwards.
- func newSource(name string, buf []byte) *source {
- file := mtoken.NewFile(name, len(buf))
- return &source{
- buf: buf,
- file: file,
- name: name,
- base: int32(file.Base()),
- }
- }
- type ErrWithPosition struct {
- pos token.Position
- err error
- }
- func (e ErrWithPosition) String() string {
- switch {
- case e.pos.IsValid():
- return fmt.Sprintf("%v: %v", e.pos, e.err)
- default:
- return fmt.Sprintf("%v", e.err)
- }
- }
- type errList []ErrWithPosition
- func (e errList) Err() (r error) {
- if len(e) == 0 {
- return nil
- }
- return e
- }
- func (e errList) Error() string {
- w := 0
- prev := ErrWithPosition{pos: token.Position{Offset: -1}}
- for _, v := range e {
- if v.pos.Line == 0 || v.pos.Offset != prev.pos.Offset || v.err.Error() != prev.err.Error() {
- e[w] = v
- w++
- prev = v
- }
- }
- var a []string
- for _, v := range e {
- a = append(a, fmt.Sprint(v))
- }
- return strings.Join(a, "\n")
- }
- func (e *errList) err(pos token.Position, msg string, args ...interface{}) {
- if trcErrors {
- trc("FAIL "+msg, args...)
- }
- switch {
- case len(args) == 0:
- *e = append(*e, ErrWithPosition{pos, fmt.Errorf("%s", msg)})
- default:
- *e = append(*e, ErrWithPosition{pos, fmt.Errorf(msg, args...)})
- }
- }
- type scanner struct {
- *source
- dir string
- errs errList
- tok tok
- last int32
- errBudget int
- c byte // Lookahead byte.
- eof bool
- isClosed bool
- }
- func newScanner(name string, buf []byte) *scanner {
- dir, _ := filepath.Split(name)
- r := &scanner{source: newSource(name, buf), errBudget: 10, dir: dir}
- switch {
- case len(buf) == 0:
- r.eof = true
- default:
- r.c = buf[0]
- if r.c == '\n' {
- r.file.AddLine(int(r.base + r.off))
- }
- }
- return r
- }
- func isDigit(c byte) bool { return c >= '0' && c <= '9' }
- func isHexDigit(c byte) bool { return isDigit(c) || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F' }
- func isIDNext(c byte) bool { return isIDFirst(c) || isDigit(c) }
- func isOctalDigit(c byte) bool { return c >= '0' && c <= '7' }
- func isIDFirst(c byte) bool {
- return c >= 'a' && c <= 'z' ||
- c >= 'A' && c <= 'Z' ||
- c == '_'
- }
- func (s *scanner) position() token.Position {
- return token.Position(s.source.file.PositionFor(mtoken.Pos(s.base+s.off), true))
- }
- func (s *scanner) pos(off int32) token.Position {
- return token.Position(s.file.PositionFor(mtoken.Pos(s.base+off), true))
- }
- func (s *scanner) token() Token {
- return Token{source: s.source, ch: s.tok.ch, index: int32(len(s.toks) - 1)}
- }
- func (s *scanner) err(off int32, msg string, args ...interface{}) {
- if s.errBudget <= 0 {
- s.close()
- return
- }
- s.errBudget--
- if n := int32(len(s.buf)); off >= n {
- off = n
- }
- s.errs.err(s.pos(off), msg, args...)
- }
- func (s *scanner) close() {
- if s.isClosed {
- return
- }
- s.tok.ch = int32(ILLEGAL)
- s.eof = true
- s.isClosed = true
- }
- func (s *scanner) next() {
- if s.eof {
- return
- }
- s.off++
- if int(s.off) == len(s.buf) {
- s.c = 0
- s.eof = true
- return
- }
- s.c = s.buf[s.off]
- if s.c == '\n' {
- s.file.AddLine(int(s.base + s.off))
- }
- }
- func (s *scanner) nextN(n int) {
- if int(s.off) == len(s.buf)-n {
- s.c = 0
- s.eof = true
- return
- }
- s.off += int32(n)
- s.c = s.buf[s.off]
- if s.c == '\n' {
- s.file.AddLine(int(s.base + s.off))
- }
- }
- func (s *scanner) scan() (r bool) {
- if s.isClosed {
- return false
- }
- s.last = s.tok.ch
- s.tok.sep = s.off
- s.tok.ch = -1
- for {
- if r = s.scan0(); !r || s.tok.ch >= 0 {
- s.toks = append(s.toks, s.tok)
- // trc("", dump(s.token()))
- return r
- }
- }
- }
- func (s *scanner) scan0() (r bool) {
- s.tok.src = mathutil.MinInt32(s.off, int32(len(s.buf)))
- switch s.c {
- case ' ', '\t', '\r', '\n':
- // White space, formed from spaces (U+0020), horizontal tabs (U+0009), carriage
- // returns (U+000D), and newlines (U+000A), is ignored except as it separates
- // tokens that would otherwise combine into a single token.
- if s.c == '\n' && s.injectSemi() {
- return true
- }
- s.next()
- return true
- case '/':
- off := s.off
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(QUO_ASSIGN)
- case '/':
- // Line comments start with the character sequence // and stop at the end of
- // the line.
- s.next()
- s.lineComment(off)
- return true
- case '*':
- // General comments start with the character sequence /* and stop with the
- // first subsequent character sequence */.
- s.next()
- s.generalComment(off)
- return true
- default:
- s.tok.ch = int32(QUO)
- }
- case '(':
- s.tok.ch = int32(LPAREN)
- s.next()
- case ')':
- s.tok.ch = int32(RPAREN)
- s.next()
- case '[':
- s.tok.ch = int32(LBRACK)
- s.next()
- case ']':
- s.tok.ch = int32(RBRACK)
- s.next()
- case '{':
- s.tok.ch = int32(LBRACE)
- s.next()
- case '}':
- s.tok.ch = int32(RBRACE)
- s.next()
- case ',':
- s.tok.ch = int32(COMMA)
- s.next()
- case ';':
- s.tok.ch = int32(SEMICOLON)
- s.next()
- case '~':
- s.tok.ch = int32(TILDE)
- s.next()
- case '"':
- off := s.off
- s.next()
- s.stringLiteral(off)
- case '\'':
- off := s.off
- s.next()
- s.runeLiteral(off)
- case '`':
- s.next()
- for {
- switch {
- case s.c == '`':
- s.next()
- s.tok.ch = int32(STRING)
- return true
- case s.eof:
- s.err(s.off, "raw string literal not terminated")
- s.tok.ch = int32(STRING)
- return true
- case s.c == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- default:
- s.next()
- }
- }
- case '.':
- s.next()
- off := s.off
- if isDigit(s.c) {
- s.dot(false, true)
- return true
- }
- if s.c != '.' {
- s.tok.ch = int32(PERIOD)
- return true
- }
- s.next()
- if s.c != '.' {
- s.off = off
- s.c = '.'
- s.tok.ch = int32(PERIOD)
- return true
- }
- s.next()
- s.tok.ch = int32(ELLIPSIS)
- return true
- case '%':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(REM_ASSIGN)
- default:
- s.tok.ch = int32(REM)
- }
- case '*':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(MUL_ASSIGN)
- default:
- s.tok.ch = int32(MUL)
- }
- case '^':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(XOR_ASSIGN)
- default:
- s.tok.ch = int32(XOR)
- }
- case '+':
- s.next()
- switch s.c {
- case '+':
- s.next()
- s.tok.ch = int32(INC)
- case '=':
- s.next()
- s.tok.ch = int32(ADD_ASSIGN)
- default:
- s.tok.ch = int32(ADD)
- }
- case '-':
- s.next()
- switch s.c {
- case '-':
- s.next()
- s.tok.ch = int32(DEC)
- case '=':
- s.next()
- s.tok.ch = int32(SUB_ASSIGN)
- default:
- s.tok.ch = int32(SUB)
- }
- case ':':
- s.next()
- switch {
- case s.c == '=':
- s.next()
- s.tok.ch = int32(DEFINE)
- default:
- s.tok.ch = int32(COLON)
- }
- case '=':
- s.next()
- switch {
- case s.c == '=':
- s.next()
- s.tok.ch = int32(EQL)
- default:
- s.tok.ch = int32(ASSIGN)
- }
- case '!':
- s.next()
- switch {
- case s.c == '=':
- s.next()
- s.tok.ch = int32(NEQ)
- default:
- s.tok.ch = int32(NOT)
- }
- case '>':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(GEQ)
- case '>':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(SHR_ASSIGN)
- default:
- s.tok.ch = int32(SHR)
- }
- default:
- s.tok.ch = int32(GTR)
- }
- case '<':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(LEQ)
- case '<':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(SHL_ASSIGN)
- default:
- s.tok.ch = int32(SHL)
- }
- case '-':
- s.next()
- s.tok.ch = int32(ARROW)
- default:
- s.tok.ch = int32(LSS)
- }
- case '|':
- s.next()
- switch s.c {
- case '|':
- s.next()
- s.tok.ch = int32(LOR)
- case '=':
- s.next()
- s.tok.ch = int32(OR_ASSIGN)
- default:
- s.tok.ch = int32(OR)
- }
- case '&':
- s.next()
- switch s.c {
- case '&':
- s.next()
- s.tok.ch = int32(LAND)
- case '^':
- s.next()
- switch s.c {
- case '=':
- s.next()
- s.tok.ch = int32(AND_NOT_ASSIGN)
- default:
- s.tok.ch = int32(AND_NOT)
- }
- case '=':
- s.next()
- s.tok.ch = int32(AND_ASSIGN)
- default:
- s.tok.ch = int32(AND)
- }
- default:
- switch {
- case isIDFirst(s.c):
- s.next()
- s.identifierOrKeyword()
- case isDigit(s.c):
- s.numericLiteral()
- case s.c >= 0x80:
- off := s.off
- switch r := s.rune(); {
- case unicode.IsLetter(r):
- s.identifierOrKeyword()
- case r == 0xfeff:
- if off == 0 { // Ignore BOM, but only at buffer start.
- return true
- }
- s.err(off, "illegal byte order mark")
- s.tok.ch = int32(ILLEGAL)
- default:
- s.err(s.off, "illegal character %#U", r)
- s.tok.ch = int32(ILLEGAL)
- }
- case s.eof:
- if s.injectSemi() {
- return true
- }
- s.close()
- s.tok.ch = int32(EOF)
- s.tok.sep = mathutil.MinInt32(s.tok.sep, s.tok.src)
- return false
- // case s.c == 0:
- // panic(todo("%v: %#U", s.position(), s.c))
- default:
- s.err(s.off, "illegal character %#U", s.c)
- s.next()
- s.tok.ch = int32(ILLEGAL)
- }
- }
- return true
- }
- func (s *scanner) runeLiteral(off int32) {
- // Leading ' consumed.
- ok := 0
- s.tok.ch = int32(CHAR)
- expOff := int32(-1)
- if s.eof {
- s.err(off, "rune literal not terminated")
- return
- }
- for {
- switch s.c {
- case '\\':
- ok++
- s.next()
- switch s.c {
- case '\'', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
- s.next()
- case 'x', 'X':
- s.next()
- for i := 0; i < 2; i++ {
- if s.c == '\'' {
- if i != 2 {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- }
- s.next()
- return
- }
- if !isHexDigit(s.c) {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- break
- }
- s.next()
- }
- case 'u':
- s.u(4)
- case 'U':
- s.u(8)
- default:
- switch {
- case s.eof:
- s.err(s.base+s.off, "escape sequence not terminated")
- return
- case isOctalDigit(s.c):
- for i := 0; i < 3; i++ {
- s.next()
- if s.c == '\'' {
- if i != 2 {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- }
- s.next()
- return
- }
- if !isOctalDigit(s.c) {
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- break
- }
- }
- default:
- s.err(s.off, "unknown escape sequence")
- }
- }
- case '\'':
- s.next()
- if ok != 1 {
- s.err(off, "illegal rune literal")
- }
- return
- case '\t':
- s.next()
- ok++
- default:
- switch {
- case s.eof:
- switch {
- case ok != 0:
- s.err(expOff, "rune literal not terminated")
- default:
- s.err(s.base+s.off, "rune literal not terminated")
- }
- return
- case s.c == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- case s.c < ' ':
- ok++
- s.err(s.off, "non-printable character: %#U", s.c)
- s.next()
- case s.c >= 0x80:
- ok++
- off := s.off
- if c := s.rune(); c == 0xfeff {
- s.err(off, "illegal byte order mark")
- }
- default:
- ok++
- s.next()
- }
- }
- if ok != 0 && expOff < 0 {
- expOff = s.off
- if s.eof {
- expOff++
- }
- }
- }
- }
- func (s *scanner) stringLiteral(off int32) {
- // Leadind " consumed.
- s.tok.ch = int32(STRING)
- for {
- switch {
- case s.c == '"':
- s.next()
- return
- case s.c == '\\':
- s.next()
- switch s.c {
- case '"', '\\', 'a', 'b', 'f', 'n', 'r', 't', 'v':
- s.next()
- continue
- case 'x', 'X':
- s.next()
- if !isHexDigit(s.c) {
- panic(todo("%v: %#U", s.position(), s.c))
- }
- s.next()
- if !isHexDigit(s.c) {
- panic(todo("%v: %#U", s.position(), s.c))
- }
- s.next()
- continue
- case 'u':
- s.u(4)
- continue
- case 'U':
- s.u(8)
- continue
- default:
- switch {
- case isOctalDigit(s.c):
- s.next()
- if isOctalDigit(s.c) {
- s.next()
- }
- if isOctalDigit(s.c) {
- s.next()
- }
- continue
- default:
- s.err(off-1, "unknown escape sequence")
- }
- }
- case s.c == '\n':
- fallthrough
- case s.eof:
- s.err(off, "string literal not terminated")
- return
- case s.c == 0:
- s.err(s.off, "illegal character NUL")
- }
- switch {
- case s.c >= 0x80:
- off := s.off
- if s.rune() == 0xfeff {
- s.err(off, "illegal byte order mark")
- }
- continue
- }
- s.next()
- }
- }
- func (s *scanner) u(n int) (r rune) {
- // Leading u/U not consumed.
- s.next()
- off := s.off
- for i := 0; i < n; i++ {
- switch {
- case isHexDigit(s.c):
- var n rune
- switch {
- case s.c >= '0' && s.c <= '9':
- n = rune(s.c) - '0'
- case s.c >= 'a' && s.c <= 'f':
- n = rune(s.c) - 'a' + 10
- case s.c >= 'A' && s.c <= 'F':
- n = rune(s.c) - 'A' + 10
- }
- r = 16*r + n
- default:
- switch {
- case s.eof:
- s.err(s.base+s.off, "escape sequence not terminated")
- default:
- s.err(s.off, "illegal character %#U in escape sequence", s.c)
- }
- return r
- }
- s.next()
- }
- if r < 0 || r > unicode.MaxRune || r >= 0xd800 && r <= 0xdfff {
- s.err(off-1, "escape sequence is invalid Unicode code point")
- }
- return r
- }
- func (s *scanner) identifierOrKeyword() {
- out:
- for {
- switch {
- case isIDNext(s.c):
- s.next()
- case s.c >= 0x80:
- off := s.off
- c := s.c
- switch r := s.rune(); {
- case unicode.IsLetter(r) || unicode.IsDigit(r):
- // already consumed
- default:
- s.off = off
- s.c = c
- break out
- }
- case s.eof:
- break out
- case s.c == 0:
- s.err(s.off, "illegal character NUL")
- break out
- default:
- break out
- }
- }
- if s.tok.ch = int32(keywords[string(s.buf[s.tok.src:s.off])]); s.tok.ch == 0 {
- s.tok.ch = int32(IDENT)
- }
- }
- func (s *scanner) numericLiteral() {
- // Leading decimal digit not consumed.
- var hasHexMantissa, needFrac bool
- more:
- switch s.c {
- case '0':
- s.next()
- switch s.c {
- case '.':
- // nop
- case 'b', 'B':
- s.next()
- s.binaryLiteral()
- return
- case 'e', 'E':
- s.exponent()
- s.tok.ch = int32(FLOAT)
- return
- case 'p', 'P':
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- s.exponent()
- s.tok.ch = int32(FLOAT)
- return
- case 'o', 'O':
- s.next()
- s.octalLiteral()
- return
- case 'x', 'X':
- hasHexMantissa = true
- needFrac = true
- s.tok.ch = int32(INT)
- s.next()
- if s.c == '.' {
- s.next()
- s.dot(hasHexMantissa, needFrac)
- return
- }
- if s.hexadecimals() == 0 {
- s.err(s.base+s.off, "hexadecimal literal has no digits")
- return
- }
- needFrac = false
- case 'i':
- s.next()
- s.tok.ch = int32(IMAG)
- return
- default:
- invalidOff := int32(-1)
- var invalidDigit byte
- for {
- if s.c == '_' {
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- if !isDigit(s.c) {
- s.err(s.off-1, "'_' must separate successive digits")
- }
- }
- if isOctalDigit(s.c) {
- s.next()
- continue
- }
- if isDigit(s.c) {
- if invalidOff < 0 {
- invalidOff = s.off
- invalidDigit = s.c
- }
- s.next()
- continue
- }
- break
- }
- switch s.c {
- case '.', 'e', 'E', 'i':
- break more
- }
- if isDigit(s.c) {
- break more
- }
- if invalidOff > 0 {
- s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
- }
- s.tok.ch = int32(INT)
- return
- }
- default:
- s.decimals()
- }
- switch s.c {
- case '.':
- s.next()
- s.dot(hasHexMantissa, needFrac)
- case 'p', 'P':
- if !hasHexMantissa {
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- }
- fallthrough
- case 'e', 'E':
- s.exponent()
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- return
- }
- s.tok.ch = int32(FLOAT)
- case 'i':
- s.next()
- s.tok.ch = int32(IMAG)
- default:
- s.tok.ch = int32(INT)
- }
- }
- func (s *scanner) octalLiteral() {
- // Leading 0o consumed.
- ok := false
- invalidOff := int32(-1)
- var invalidDigit byte
- s.tok.ch = int32(INT)
- for {
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- switch s.c {
- case '0', '1', '2', '3', '4', '5', '6', '7':
- s.next()
- ok = true
- case '8', '9':
- if invalidOff < 0 {
- invalidOff = s.off
- invalidDigit = s.c
- }
- s.next()
- case '.':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "invalid radix point in octal literal")
- s.next()
- case 'e', 'E':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
- s.exponent()
- case 'p', 'P':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- s.exponent()
- default:
- switch {
- case !ok:
- s.err(s.base+s.off, "octal literal has no digits")
- case invalidOff > 0:
- s.err(invalidOff, "invalid digit '%c' in octal literal", invalidDigit)
- }
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- }
- return
- }
- }
- }
- func (s *scanner) binaryLiteral() {
- // Leading 0b consumed.
- ok := false
- invalidOff := int32(-1)
- var invalidDigit byte
- s.tok.ch = int32(INT)
- for {
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- switch s.c {
- case '0', '1':
- s.next()
- ok = true
- case '.':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "invalid radix point in binary literal")
- s.next()
- case 'e', 'E':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires decimal mantissa", s.c)
- s.exponent()
- case 'p', 'P':
- s.tok.ch = int32(FLOAT)
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- s.exponent()
- default:
- if isDigit(s.c) {
- if invalidOff < 0 {
- invalidOff = s.off
- invalidDigit = s.c
- }
- s.next()
- continue
- }
- switch {
- case !ok:
- s.err(s.base+s.off, "binary literal has no digits")
- case invalidOff > 0:
- s.err(invalidOff, "invalid digit '%c' in binary literal", invalidDigit)
- }
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- }
- return
- }
- }
- }
- func (s *scanner) generalComment(off int32) (injectSemi bool) {
- // Leading /* consumed
- off0 := s.off - 2
- var nl bool
- for {
- switch {
- case s.c == '*':
- s.next()
- switch s.c {
- case '/':
- s.lineInfo(off0, s.off+1)
- s.next()
- if nl {
- return s.injectSemi()
- }
- return false
- }
- case s.c == '\n':
- nl = true
- s.next()
- case s.eof:
- s.tok.ch = 0
- s.err(off, "comment not terminated")
- return true
- case s.c == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- default:
- s.next()
- }
- }
- }
- func (s *scanner) lineComment(off int32) (injectSemi bool) {
- // Leading // consumed
- off0 := s.off - 2
- for {
- switch {
- case s.c == '\n':
- s.lineInfo(off0, s.off+1)
- if s.injectSemi() {
- return true
- }
- s.next()
- return false
- case s.c >= 0x80:
- if c := s.rune(); c == 0xfeff {
- s.err(off+2, "illegal byte order mark")
- }
- case s.eof:
- s.off++
- if s.injectSemi() {
- return true
- }
- return false
- case s.c == 0:
- return false
- default:
- s.next()
- }
- }
- }
- func (s *scanner) lineInfo(off, next int32) {
- if off != 0 && s.buf[off+1] != '*' && s.buf[off-1] != '\n' && s.buf[off-1] != '\r' {
- return
- }
- str := s.buf[off:next]
- if !bytes.HasPrefix(str[len("//"):], lineCommentTag) {
- return
- }
- switch {
- case str[1] == '*':
- str = str[:len(str)-len("*/")]
- default:
- str = str[:len(str)-len("\n")]
- }
- str = str[len("//"):]
- str, ln, ok := s.lineInfoNum(str[len("line "):])
- col := 0
- if ok == liBadNum || ok == liNoNum {
- return
- }
- hasCol := false
- var n int
- if str, n, ok = s.lineInfoNum(str); ok == liBadNum {
- return
- }
- if ok != liNoNum {
- col = ln
- ln = n
- hasCol = true
- }
- fn := strings.TrimSpace(string(str))
- switch {
- case fn == "" && hasCol:
- fn = s.pos(off).Filename
- case fn != "":
- fn = filepath.Clean(fn)
- if !filepath.IsAbs(fn) {
- fn = filepath.Join(s.dir, fn)
- }
- }
- // trc("set %v %q %v %v", next, fn, ln, col)
- s.file.AddLineColumnInfo(int(next), fn, ln, col)
- }
- const (
- liNoNum = iota
- liBadNum
- liOK
- )
- func (s *scanner) lineInfoNum(str []byte) (_ []byte, n, r int) {
- // trc("==== %q", str)
- x := len(str) - 1
- if x < 0 || !isDigit(str[x]) {
- return str, 0, liNoNum
- }
- mul := 1
- for x > 0 && isDigit(str[x]) {
- n += mul * (int(str[x]) - '0')
- mul *= 10
- x--
- if n < 0 {
- return str, 0, liBadNum
- }
- }
- if x < 0 || str[x] != ':' {
- return str, 0, liBadNum
- }
- // trc("---- %q %v %v", str[:x], n, liOK)
- return str[:x], n, liOK
- }
- func (s *scanner) rune() rune {
- switch r, sz := utf8.DecodeRune(s.buf[s.off:]); {
- case r == utf8.RuneError && sz == 0:
- panic(todo("%v: %#U", s.position(), s.c))
- case r == utf8.RuneError && sz == 1:
- s.err(s.off, "illegal UTF-8 encoding")
- s.next()
- return r
- default:
- s.nextN(sz)
- return r
- }
- }
- func (s *scanner) dot(hasHexMantissa, needFrac bool) {
- // '.' already consumed
- switch {
- case hasHexMantissa:
- if s.hexadecimals() == 0 && needFrac {
- s.err(s.off, "hexadecimal literal has no digits")
- }
- switch s.c {
- case 'p', 'P':
- // ok
- default:
- s.err(s.off, "hexadecimal mantissa requires a 'p' exponent")
- }
- default:
- if s.decimals() == 0 && needFrac {
- panic(todo("%v: %#U", s.position(), s.c))
- }
- }
- switch s.c {
- case 'p', 'P':
- if !hasHexMantissa {
- s.err(s.off, "'%c' exponent requires hexadecimal mantissa", s.c)
- }
- fallthrough
- case 'e', 'E':
- s.exponent()
- if s.c == 'i' {
- s.next()
- s.tok.ch = int32(IMAG)
- return
- }
- s.tok.ch = int32(FLOAT)
- case 'i':
- s.next()
- s.tok.ch = int32(IMAG)
- default:
- s.tok.ch = int32(FLOAT)
- }
- }
- func (s *scanner) exponent() {
- // Leanding e or E not consumed.
- s.next()
- switch s.c {
- case '+', '-':
- s.next()
- }
- if !isDigit(s.c) {
- s.err(s.base+s.off, "exponent has no digits")
- return
- }
- s.decimals()
- }
- func (s *scanner) decimals() (r int) {
- first := true
- for {
- switch {
- case isDigit(s.c):
- first = false
- s.next()
- r++
- case s.c == '_':
- for n := 0; s.c == '_'; n++ {
- if first || n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- if !isDigit(s.c) {
- s.err(s.off-1, "'_' must separate successive digits")
- }
- default:
- return r
- }
- }
- }
- func (s *scanner) hexadecimals() (r int) {
- for {
- switch {
- case isHexDigit(s.c):
- s.next()
- r++
- case s.c == '_':
- for n := 0; s.c == '_'; n++ {
- if n == 1 {
- s.err(s.off, "'_' must separate successive digits")
- }
- s.next()
- }
- if !isHexDigit(s.c) {
- s.err(s.off-1, "'_' must separate successive digits")
- }
- default:
- return r
- }
- }
- }
- // When the input is broken into tokens, a semicolon is automatically inserted
- // into the token stream immediately after a line's final token if that token
- // is
- //
- // - an identifier
- // - an integer, floating-point, imaginary, rune, or string literal
- // - one of the keywords break, continue, fallthrough, or return
- // - one of the operators and punctuation ++, --, ), ], or }
- func (s *scanner) injectSemi() bool {
- switch token.Token(s.last) {
- case
- IDENT, INT, FLOAT, IMAG, CHAR, STRING,
- BREAK, CONTINUE, FALLTHROUGH, RETURN,
- INC, DEC, RPAREN, RBRACK, RBRACE:
- s.tok.ch = int32(SEMICOLON)
- s.last = 0
- if s.c == '\n' {
- s.next()
- }
- return true
- }
- s.last = 0
- return false
- }
|