scanf.go 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748
  1. // Copyright 2020 The Libc Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. //go:build !(linux && (amd64 || arm64 || loong64 || ppc64le || s390x || riscv64 || 386 || arm))
  5. package libc // import "modernc.org/libc"
  6. import (
  7. "io"
  8. "strconv"
  9. "strings"
  10. "unsafe"
  11. )
  12. // The format string consists of a sequence of directives which describe how to
  13. // process the sequence of input characters. If processing of a directive
  14. // fails, no further input is read, and scanf() returns. A "failure" can
  15. // be either of the following: input failure, meaning that input characters
  16. // were unavailable, or matching failure, meaning that the input was
  17. // inappropriate.
  18. func scanf(r io.ByteScanner, format, args uintptr) (nvalues int32) {
  19. // var src []byte //TODO-
  20. var ok bool
  21. out:
  22. for {
  23. c := *(*byte)(unsafe.Pointer(format))
  24. // src = append(src, c) //TODO-
  25. switch c {
  26. case '%':
  27. var n int
  28. var match bool
  29. format, n, match = scanfConversion(r, format, &args)
  30. if !match {
  31. break out
  32. }
  33. nvalues += int32(n)
  34. ok = true
  35. case 0:
  36. break out
  37. case ' ', '\t', '\n', '\r', '\v', '\f':
  38. format = skipWhiteSpace(format)
  39. ok = true
  40. next:
  41. for {
  42. c, err := r.ReadByte()
  43. if err != nil {
  44. break out
  45. }
  46. switch c {
  47. case ' ', '\t', '\n', '\r', '\v', '\f':
  48. // nop
  49. default:
  50. r.UnreadByte()
  51. break next
  52. }
  53. }
  54. default:
  55. c2, err := r.ReadByte()
  56. if err != nil {
  57. break out
  58. }
  59. if c2 != c {
  60. r.UnreadByte()
  61. break out
  62. }
  63. format++
  64. ok = true
  65. }
  66. }
  67. if ok {
  68. return nvalues
  69. }
  70. return -1 // stdio.EOF but not defined for windows
  71. }
  72. func scanfConversion(r io.ByteScanner, format uintptr, args *uintptr) (_ uintptr, nvalues int, match bool) {
  73. format++ // '%'
  74. // Each conversion specification in format begins with either the character '%'
  75. // or the character sequence "%n$" (see below for the distinction) followed by:
  76. mod := 0
  77. width := -1
  78. discard := false
  79. flags:
  80. for {
  81. switch c := *(*byte)(unsafe.Pointer(format)); c {
  82. case '*':
  83. // An optional '*' assignment-suppression character: scanf() reads input as
  84. // directed by the conversion specification, but discards the input. No
  85. // corresponding pointer argument is re‐ quired, and this specification is not
  86. // included in the count of successful assignments returned by scanf().
  87. format++
  88. discard = true
  89. case '\'':
  90. // For decimal conversions, an optional quote character ('). This specifies
  91. // that the input number may include thousands' separators as defined by the
  92. // LC_NUMERIC category of the current locale. (See setlocale(3).) The quote
  93. // character may precede or follow the '*' assignment-suppression character.
  94. format++
  95. panic(todo(""))
  96. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  97. // An optional decimal integer which specifies the maximum field width.
  98. // Reading of characters stops either when this maximum is reached or when a
  99. // nonmatching character is found, whichever happens first. Most conversions
  100. // discard initial white space characters (the exceptions are noted below), and
  101. // these discarded characters don't count toward the maximum field width.
  102. // String input conversions store a terminating null byte ('\0') to mark the
  103. // end of the input; the maximum field width does not include this terminator.
  104. width = 0
  105. num:
  106. for {
  107. var digit int
  108. switch c := *(*byte)(unsafe.Pointer(format)); {
  109. default:
  110. break num
  111. case c >= '0' && c <= '9':
  112. format++
  113. digit = int(c) - '0'
  114. }
  115. width0 := width
  116. width = 10*width + digit
  117. if width < width0 {
  118. panic(todo(""))
  119. }
  120. }
  121. case 'h', 'j', 'l', 'L', 'q', 't', 'z':
  122. format, mod = parseLengthModifier(format)
  123. default:
  124. break flags
  125. }
  126. }
  127. // A conversion specifier that specifies the type of input conversion to be
  128. // performed.
  129. switch c := *(*byte)(unsafe.Pointer(format)); c {
  130. case '%':
  131. // Matches a literal '%'. That is, %% in the format string matches a single
  132. // input '%' character. No conversion is done (but initial white space
  133. // characters are discarded), and assign‐ ment does not occur.
  134. format++
  135. skipReaderWhiteSpace(r)
  136. c, err := r.ReadByte()
  137. if err != nil {
  138. return format, -1, false
  139. }
  140. if c == '%' {
  141. return format, 1, true
  142. }
  143. r.UnreadByte()
  144. return format, 0, false
  145. case 'd':
  146. // Matches an optionally signed decimal integer; the next pointer must be a
  147. // pointer to int.
  148. format++
  149. skipReaderWhiteSpace(r)
  150. var digit, n uint64
  151. allowSign := true
  152. neg := false
  153. dec:
  154. for ; width != 0; width-- {
  155. c, err := r.ReadByte()
  156. if err != nil {
  157. if match {
  158. break dec
  159. }
  160. return 0, 0, false
  161. }
  162. if allowSign {
  163. switch c {
  164. case '-':
  165. allowSign = false
  166. neg = true
  167. continue
  168. case '+':
  169. allowSign = false
  170. continue
  171. }
  172. }
  173. switch {
  174. case c >= '0' && c <= '9':
  175. digit = uint64(c) - '0'
  176. default:
  177. r.UnreadByte()
  178. break dec
  179. }
  180. match = true
  181. n0 := n
  182. n = n*10 + digit
  183. if n < n0 {
  184. panic(todo(""))
  185. }
  186. }
  187. if !match {
  188. break
  189. }
  190. if !discard {
  191. arg := VaUintptr(args)
  192. v := int64(n)
  193. if neg {
  194. v = -v
  195. }
  196. switch mod {
  197. case modNone:
  198. *(*int32)(unsafe.Pointer(arg)) = int32(v)
  199. case modH:
  200. *(*int16)(unsafe.Pointer(arg)) = int16(v)
  201. case modHH:
  202. *(*int8)(unsafe.Pointer(arg)) = int8(v)
  203. case modL:
  204. *(*long)(unsafe.Pointer(arg)) = long(v)
  205. case modLL:
  206. *(*int64)(unsafe.Pointer(arg)) = int64(v)
  207. default:
  208. panic(todo("", mod))
  209. }
  210. }
  211. nvalues = 1
  212. case 'D':
  213. // Equivalent to ld; this exists only for backward compatibility. (Note:
  214. // thus only in libc4. In libc5 and glibc the %D is silently ignored, causing
  215. // old programs to fail mysteriously.)
  216. format++
  217. panic(todo(""))
  218. case 'i':
  219. // Matches an optionally signed integer; the next pointer must be a pointer to
  220. // int. The integer is read in base 16 if it begins with 0x or 0X, in base 8
  221. // if it begins with 0, and in base 10 otherwise. Only characters that
  222. // correspond to the base are used.
  223. format++
  224. panic(todo(""))
  225. case 'o':
  226. // Matches an unsigned octal integer; the next pointer must be a pointer to
  227. // unsigned int.
  228. format++
  229. panic(todo(""))
  230. case 'u':
  231. // Matches an unsigned decimal integer; the next pointer must be a pointer to
  232. // unsigned int.
  233. format++
  234. panic(todo(""))
  235. case 'x', 'X':
  236. // Matches an unsigned hexadecimal integer; the next pointer must be a pointer
  237. // to unsigned int.
  238. format++
  239. skipReaderWhiteSpace(r)
  240. var digit, n uint64
  241. allowPrefix := true
  242. var b []byte
  243. hex:
  244. for ; width != 0; width-- {
  245. c, err := r.ReadByte()
  246. if err != nil {
  247. if match || err == io.EOF {
  248. break hex
  249. }
  250. panic(todo("", err))
  251. }
  252. if allowPrefix {
  253. if len(b) == 1 && b[0] == '0' && (c == 'x' || c == 'X') {
  254. allowPrefix = false
  255. match = false
  256. b = nil
  257. continue
  258. }
  259. b = append(b, c)
  260. }
  261. switch {
  262. case c >= '0' && c <= '9':
  263. digit = uint64(c) - '0'
  264. case c >= 'a' && c <= 'f':
  265. digit = uint64(c) - 'a' + 10
  266. case c >= 'A' && c <= 'F':
  267. digit = uint64(c) - 'A' + 10
  268. default:
  269. r.UnreadByte()
  270. break hex
  271. }
  272. match = true
  273. n0 := n
  274. n = n<<4 + digit
  275. if n < n0 {
  276. panic(todo(""))
  277. }
  278. }
  279. if !match {
  280. break
  281. }
  282. if !discard {
  283. arg := VaUintptr(args)
  284. switch mod {
  285. case modNone:
  286. *(*uint32)(unsafe.Pointer(arg)) = uint32(n)
  287. case modH:
  288. *(*uint16)(unsafe.Pointer(arg)) = uint16(n)
  289. case modHH:
  290. *(*byte)(unsafe.Pointer(arg)) = byte(n)
  291. case modL:
  292. *(*ulong)(unsafe.Pointer(arg)) = ulong(n)
  293. default:
  294. panic(todo(""))
  295. }
  296. }
  297. nvalues = 1
  298. case 'f', 'e', 'g', 'E', 'a':
  299. // Matches an optionally signed floating-point number; the next pointer must be
  300. // a pointer to float.
  301. format++
  302. skipReaderWhiteSpace(r)
  303. seq := fpLiteral(r)
  304. if len(seq) == 0 {
  305. return 0, 0, false
  306. }
  307. var neg bool
  308. switch seq[0] {
  309. case '+':
  310. seq = seq[1:]
  311. case '-':
  312. neg = true
  313. seq = seq[1:]
  314. }
  315. n, err := strconv.ParseFloat(string(seq), 64)
  316. if err != nil {
  317. panic(todo("", err))
  318. }
  319. if !discard {
  320. arg := VaUintptr(args)
  321. if neg {
  322. n = -n
  323. }
  324. switch mod {
  325. case modNone:
  326. *(*float32)(unsafe.Pointer(arg)) = float32(n)
  327. case modL:
  328. *(*float64)(unsafe.Pointer(arg)) = n
  329. default:
  330. panic(todo("", mod, neg, n))
  331. }
  332. }
  333. return format, 1, true
  334. case 's':
  335. // Matches a sequence of non-white-space characters; the next pointer must be
  336. // a pointer to the initial element of a character array that is long enough to
  337. // hold the input sequence and the terminating null byte ('\0'), which is added
  338. // automatically. The input string stops at white space or at the maximum
  339. // field width, whichever occurs first.
  340. var c byte
  341. var err error
  342. var arg uintptr
  343. if !discard {
  344. arg = VaUintptr(args)
  345. }
  346. scans:
  347. for ; width != 0; width-- {
  348. if c, err = r.ReadByte(); err != nil {
  349. if err != io.EOF {
  350. nvalues = -1
  351. }
  352. break scans
  353. }
  354. switch c {
  355. case ' ', '\t', '\n', '\r', '\v', '\f':
  356. break scans
  357. }
  358. nvalues = 1
  359. match = true
  360. if !discard {
  361. *(*byte)(unsafe.Pointer(arg)) = c
  362. arg++
  363. }
  364. }
  365. if match {
  366. switch {
  367. case width == 0:
  368. r.UnreadByte()
  369. fallthrough
  370. default:
  371. if !discard {
  372. *(*byte)(unsafe.Pointer(arg)) = 0
  373. }
  374. }
  375. }
  376. case 'c':
  377. // Matches a sequence of characters whose length is specified by the maximum
  378. // field width (default 1); the next pointer must be a pointer to char, and
  379. // there must be enough room for all the characters (no terminating null byte
  380. // is added). The usual skip of leading white space is suppressed. To skip
  381. // white space first, use an explicit space in the format.
  382. format++
  383. panic(todo(""))
  384. case '[':
  385. // Matches a nonempty sequence of characters from the specified set of
  386. // accepted characters; the next pointer must be a pointer to char, and there
  387. // must be enough room for all the char‐ acters in the string, plus a
  388. // terminating null byte. The usual skip of leading white space is suppressed.
  389. // The string is to be made up of characters in (or not in) a particular set;
  390. // the set is defined by the characters between the open bracket [ character
  391. // and a close bracket ] character. The set excludes those characters if the
  392. // first character after the open bracket is a circumflex (^). To include a
  393. // close bracket in the set, make it the first character after the open bracket
  394. // or the circumflex; any other position will end the set. The hyphen
  395. // character - is also special; when placed between two other characters, it
  396. // adds all intervening characters to the set. To include a hyphen, make it
  397. // the last character before the final close bracket. For instance, [^]0-9-]
  398. // means the set "everything except close bracket, zero through nine, and
  399. // hyphen". The string ends with the appearance of a character not in the
  400. // (or, with a circumflex, in) set or when the field width runs out.
  401. format++
  402. var re0 []byte
  403. bracket:
  404. for i := 0; ; i++ {
  405. c := *(*byte)(unsafe.Pointer(format))
  406. format++
  407. if c == ']' && i != 0 {
  408. break bracket
  409. }
  410. re0 = append(re0, c)
  411. }
  412. set := map[byte]struct{}{}
  413. re := string(re0)
  414. neg := strings.HasPrefix(re, "^")
  415. if neg {
  416. re = re[1:]
  417. }
  418. for len(re) != 0 {
  419. switch {
  420. case len(re) >= 3 && re[1] == '-':
  421. for c := re[0]; c <= re[2]; c++ {
  422. set[c] = struct{}{}
  423. }
  424. re = re[3:]
  425. default:
  426. set[c] = struct{}{}
  427. re = re[1:]
  428. }
  429. }
  430. var arg uintptr
  431. if !discard {
  432. arg = VaUintptr(args)
  433. }
  434. for ; width != 0; width-- {
  435. c, err := r.ReadByte()
  436. if err != nil {
  437. if err == io.EOF {
  438. return format, nvalues, match
  439. }
  440. return format, -1, match
  441. }
  442. if _, ok := set[c]; ok == !neg {
  443. match = true
  444. nvalues = 1
  445. if !discard {
  446. *(*byte)(unsafe.Pointer(arg)) = c
  447. arg++
  448. }
  449. }
  450. }
  451. if match {
  452. switch {
  453. case width == 0:
  454. r.UnreadByte()
  455. fallthrough
  456. default:
  457. if !discard {
  458. *(*byte)(unsafe.Pointer(arg)) = 0
  459. }
  460. }
  461. }
  462. case 'p':
  463. // Matches a pointer value (as printed by %p in printf(3); the next pointer
  464. // must be a pointer to a pointer to void.
  465. format++
  466. skipReaderWhiteSpace(r)
  467. c, err := r.ReadByte()
  468. if err != nil {
  469. panic(todo("", err))
  470. }
  471. if c == '0' {
  472. if c, err = r.ReadByte(); err != nil {
  473. panic(todo("", err))
  474. }
  475. if c != 'x' && c != 'X' {
  476. r.UnreadByte()
  477. }
  478. }
  479. var digit, n uint64
  480. ptr:
  481. for ; width != 0; width-- {
  482. c, err := r.ReadByte()
  483. if err != nil {
  484. if match {
  485. break ptr
  486. }
  487. panic(todo(""))
  488. }
  489. switch {
  490. case c >= '0' && c <= '9':
  491. digit = uint64(c) - '0'
  492. case c >= 'a' && c <= 'f':
  493. digit = uint64(c) - 'a' + 10
  494. case c >= 'A' && c <= 'F':
  495. digit = uint64(c) - 'A' + 10
  496. default:
  497. r.UnreadByte()
  498. break ptr
  499. }
  500. match = true
  501. n0 := n
  502. n = n<<4 + digit
  503. if n < n0 {
  504. panic(todo(""))
  505. }
  506. }
  507. if !match {
  508. break
  509. }
  510. if !discard {
  511. arg := VaUintptr(args)
  512. *(*uintptr)(unsafe.Pointer(arg)) = uintptr(n)
  513. }
  514. nvalues = 1
  515. case 'n':
  516. // Nothing is expected; instead, the number of characters consumed thus far
  517. // from the input is stored through the next pointer, which must be a pointer
  518. // to int. This is not a conversion and does not increase the count returned
  519. // by the function. The assignment can be suppressed with the *
  520. // assignment-suppression character, but the effect on the return value is
  521. // undefined. Therefore %*n conversions should not be used.
  522. format++
  523. panic(todo(""))
  524. default:
  525. panic(todo("%#U", c))
  526. }
  527. return format, nvalues, match
  528. }
  529. func skipReaderWhiteSpace(r io.ByteScanner) error {
  530. for {
  531. c, err := r.ReadByte()
  532. if err != nil {
  533. return err
  534. }
  535. switch c {
  536. case ' ', '\t', '\n', '\r', '\v', '\f':
  537. // ok
  538. default:
  539. r.UnreadByte()
  540. return nil
  541. }
  542. }
  543. }
  544. func skipWhiteSpace(s uintptr) uintptr {
  545. for {
  546. switch c := *(*byte)(unsafe.Pointer(s)); c {
  547. case ' ', '\t', '\n', '\r', '\v', '\f':
  548. s++
  549. default:
  550. return s
  551. }
  552. }
  553. }
  554. // [-+]?([0-9]*[.])?[0-9]+([eE][-+]?\d+)?
  555. func fpLiteral(rd io.ByteScanner) (seq []byte) {
  556. const endOfText = 0x110000
  557. var pos, width, length int
  558. defer func() {
  559. if len(seq) > length {
  560. rd.UnreadByte()
  561. seq = seq[:len(seq)-1]
  562. }
  563. }()
  564. var r rune
  565. step := func(pos int) (rune, int) {
  566. b, err := rd.ReadByte()
  567. if err != nil {
  568. return endOfText, 0
  569. }
  570. seq = append(seq, b)
  571. return rune(b), 1
  572. }
  573. move := func() {
  574. pos += width
  575. if r != endOfText {
  576. r, width = step(pos + width)
  577. }
  578. }
  579. accept := func(x rune) bool {
  580. if r == x {
  581. move()
  582. return true
  583. }
  584. return false
  585. }
  586. accept2 := func(x rune) bool {
  587. if r <= x {
  588. move()
  589. return true
  590. }
  591. return false
  592. }
  593. r = endOfText
  594. width = 0
  595. r, width = step(pos)
  596. if accept('.') {
  597. goto l7
  598. }
  599. if accept('+') {
  600. goto l30
  601. }
  602. if accept('-') {
  603. goto l30
  604. }
  605. if r < '0' {
  606. goto l4out
  607. }
  608. if accept2('9') {
  609. goto l35
  610. }
  611. l4out:
  612. return seq
  613. l7:
  614. if r < '0' {
  615. goto l7out
  616. }
  617. if accept2('9') {
  618. goto l10
  619. }
  620. l7out:
  621. return seq
  622. l10:
  623. length = pos
  624. if accept('E') {
  625. goto l18
  626. }
  627. if accept('e') {
  628. goto l18
  629. }
  630. if r < '0' {
  631. goto l15out
  632. }
  633. if accept2('9') {
  634. goto l10
  635. }
  636. l15out:
  637. return seq
  638. l18:
  639. if accept('+') {
  640. goto l23
  641. }
  642. if accept('-') {
  643. goto l23
  644. }
  645. if r < '0' {
  646. goto l20out
  647. }
  648. if accept2('9') {
  649. goto l26
  650. }
  651. l20out:
  652. return seq
  653. l23:
  654. if r < '0' {
  655. goto l23out
  656. }
  657. if accept2('9') {
  658. goto l26
  659. }
  660. l23out:
  661. return seq
  662. l26:
  663. length = pos
  664. if r < '0' {
  665. goto l27out
  666. }
  667. if accept2('9') {
  668. goto l26
  669. }
  670. l27out:
  671. return seq
  672. l30:
  673. if accept('.') {
  674. goto l7
  675. }
  676. if r < '0' {
  677. goto l32out
  678. }
  679. if accept2('9') {
  680. goto l35
  681. }
  682. l32out:
  683. return seq
  684. l35:
  685. length = pos
  686. if accept('.') {
  687. goto l7
  688. }
  689. if accept('E') {
  690. goto l18
  691. }
  692. if accept('e') {
  693. goto l18
  694. }
  695. if r < '0' {
  696. goto l42out
  697. }
  698. if accept2('9') {
  699. goto l35
  700. }
  701. l42out:
  702. return seq
  703. }