SCAN.ob07 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783
  1. (*
  2. BSD 2-Clause License
  3. Copyright (c) 2018-2021, Anton Krotov
  4. All rights reserved.
  5. *)
  6. MODULE SCAN;
  7. IMPORT TXT := TEXTDRV, ARITH, S := STRINGS, ERRORS, LISTS;
  8. CONST
  9. NUMLEN = 256;
  10. IDLEN = 256;
  11. TEXTLEN = 512;
  12. lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3;
  13. lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7;
  14. lxEOF* = 8;
  15. lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24;
  16. lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28;
  17. lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32;
  18. lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36;
  19. lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40;
  20. lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44;
  21. lxASSIGN* = 45; lxRANGE* = 46;
  22. lxKW = 51;
  23. lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54;
  24. lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58;
  25. lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62;
  26. lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66;
  27. lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70;
  28. lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74;
  29. lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78;
  30. lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82;
  31. lxWHILE* = 83;
  32. lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4;
  33. lxERROR05* = -5; (*lxERROR06* = -6;*) lxERROR07* = -7; lxERROR08* = -8;
  34. lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12;
  35. lxERROR13* = -13;
  36. TYPE
  37. TEXTSTR* = ARRAY TEXTLEN OF CHAR;
  38. IDSTR* = ARRAY IDLEN OF CHAR;
  39. DEF = POINTER TO RECORD (LISTS.ITEM)
  40. ident: IDSTR
  41. END;
  42. STRING* = POINTER TO RECORD (LISTS.ITEM)
  43. s*: TEXTSTR;
  44. offset*, offsetW*, hash: INTEGER
  45. END;
  46. IDENT* = RECORD
  47. s*: IDSTR;
  48. hash*: INTEGER
  49. END;
  50. POSITION* = RECORD
  51. line*, col*: INTEGER
  52. END;
  53. LEX* = RECORD
  54. sym*: INTEGER;
  55. pos*: POSITION;
  56. ident*: IDENT;
  57. string*: STRING;
  58. value*: ARITH.VALUE;
  59. error*: INTEGER
  60. END;
  61. SCANNER* = TXT.TEXT;
  62. KEYWORD = ARRAY 10 OF CHAR;
  63. VAR
  64. delimiters: ARRAY 256 OF BOOLEAN;
  65. upto, LowerCase, _if: BOOLEAN;
  66. strings, def: LISTS.LIST;
  67. KW: ARRAY 33 OF RECORD upper, lower: KEYWORD; uhash, lhash: INTEGER END;
  68. PROCEDURE enterKW (s: KEYWORD; idx: INTEGER);
  69. BEGIN
  70. KW[idx].lower := s;
  71. KW[idx].upper := s;
  72. S.UpCase(KW[idx].upper);
  73. KW[idx].uhash := S.HashStr(KW[idx].upper);
  74. KW[idx].lhash := S.HashStr(KW[idx].lower);
  75. END enterKW;
  76. PROCEDURE checkKW (ident: IDENT): INTEGER;
  77. VAR
  78. i, res: INTEGER;
  79. BEGIN
  80. res := lxIDENT;
  81. i := 0;
  82. WHILE i < LEN(KW) DO
  83. IF (KW[i].uhash = ident.hash) & (KW[i].upper = ident.s)
  84. OR LowerCase & (KW[i].lhash = ident.hash) & (KW[i].lower = ident.s) THEN
  85. res := i + lxKW;
  86. i := LEN(KW)
  87. END;
  88. INC(i)
  89. END
  90. RETURN res
  91. END checkKW;
  92. PROCEDURE enterStr* (s: TEXTSTR): STRING;
  93. VAR
  94. str, res: STRING;
  95. hash: INTEGER;
  96. BEGIN
  97. hash := S.HashStr(s);
  98. str := strings.first(STRING);
  99. res := NIL;
  100. WHILE str # NIL DO
  101. IF (str.hash = hash) & (str.s = s) THEN
  102. res := str;
  103. str := NIL
  104. ELSE
  105. str := str.next(STRING)
  106. END
  107. END;
  108. IF res = NIL THEN
  109. NEW(res);
  110. res.s := s;
  111. res.offset := -1;
  112. res.offsetW := -1;
  113. res.hash := hash;
  114. LISTS.push(strings, res)
  115. END
  116. RETURN res
  117. END enterStr;
  118. PROCEDURE nextc (text: TXT.TEXT): CHAR;
  119. BEGIN
  120. TXT.next(text)
  121. RETURN text.peak
  122. END nextc;
  123. PROCEDURE setIdent* (VAR ident: IDENT; s: IDSTR);
  124. BEGIN
  125. ident.s := s;
  126. ident.hash := S.HashStr(s)
  127. END setIdent;
  128. PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
  129. VAR
  130. c: CHAR;
  131. i: INTEGER;
  132. BEGIN
  133. c := text.peak;
  134. ASSERT(S.letter(c));
  135. i := 0;
  136. WHILE (i < IDLEN - 1) & (S.letter(c) OR S.digit(c)) DO
  137. lex.ident.s[i] := c;
  138. INC(i);
  139. c := nextc(text)
  140. END;
  141. lex.ident.s[i] := 0X;
  142. lex.ident.hash := S.HashStr(lex.ident.s);
  143. lex.sym := checkKW(lex.ident);
  144. IF S.letter(c) OR S.digit(c) THEN
  145. ERRORS.WarningMsg(lex.pos.line, lex.pos.col, 2);
  146. WHILE S.letter(c) OR S.digit(c) DO
  147. c := nextc(text)
  148. END
  149. END
  150. END ident;
  151. PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
  152. TYPE
  153. NUMSTR = ARRAY NUMLEN OF CHAR;
  154. VAR
  155. c: CHAR;
  156. hex: BOOLEAN;
  157. error, sym, i: INTEGER;
  158. num: NUMSTR;
  159. PROCEDURE push (VAR num: NUMSTR; VAR i: INTEGER; c: CHAR);
  160. BEGIN
  161. IF i < NUMLEN - 1 THEN
  162. num[i] := c;
  163. INC(i)
  164. END
  165. END push;
  166. BEGIN
  167. c := text.peak;
  168. ASSERT(S.digit(c));
  169. i := 0;
  170. error := 0;
  171. sym := lxINTEGER;
  172. hex := FALSE;
  173. WHILE S.digit(c) DO
  174. push(num, i, c);
  175. c := nextc(text)
  176. END;
  177. WHILE S.hexdigit(c) OR LowerCase & ("a" <= c) & (c <= "f") DO
  178. S.cap(c);
  179. push(num, i, c);
  180. c := nextc(text);
  181. hex := TRUE
  182. END;
  183. IF (c = "H") OR LowerCase & (c = "h") THEN
  184. push(num, i, c);
  185. TXT.next(text);
  186. sym := lxHEX
  187. ELSIF (c = "X") OR LowerCase & (c = "x") THEN
  188. push(num, i, c);
  189. TXT.next(text);
  190. sym := lxCHAR
  191. ELSIF c = "." THEN
  192. IF hex THEN
  193. sym := lxERROR01
  194. ELSE
  195. c := nextc(text);
  196. IF c # "." THEN
  197. push(num, i, ".");
  198. sym := lxFLOAT
  199. ELSE
  200. sym := lxINTEGER;
  201. text.peak := 7FX;
  202. upto := TRUE
  203. END;
  204. WHILE S.digit(c) DO
  205. push(num, i, c);
  206. c := nextc(text)
  207. END;
  208. IF (c = "E") OR LowerCase & (c = "e") THEN
  209. push(num, i, c);
  210. c := nextc(text);
  211. IF (c = "+") OR (c = "-") THEN
  212. push(num, i, c);
  213. c := nextc(text)
  214. END;
  215. IF S.digit(c) THEN
  216. WHILE S.digit(c) DO
  217. push(num, i, c);
  218. c := nextc(text)
  219. END
  220. ELSE
  221. sym := lxERROR02
  222. END
  223. END
  224. END
  225. ELSIF hex THEN
  226. sym := lxERROR01
  227. END;
  228. IF (i = NUMLEN - 1) & (sym >= 0) THEN
  229. sym := lxERROR07
  230. END;
  231. num[i] := 0X;
  232. IF sym = lxINTEGER THEN
  233. ARITH.iconv(num, lex.value, error)
  234. ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN
  235. ARITH.hconv(num, lex.value, error)
  236. ELSIF sym = lxFLOAT THEN
  237. ARITH.fconv(num, lex.value, error)
  238. END;
  239. CASE error OF
  240. |0:
  241. |1: sym := lxERROR08
  242. |2: sym := lxERROR09
  243. |3: sym := lxERROR10
  244. |4: sym := lxERROR11
  245. |5: sym := lxERROR12
  246. END;
  247. lex.sym := sym
  248. END number;
  249. PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
  250. VAR
  251. c: CHAR;
  252. i: INTEGER;
  253. str: TEXTSTR;
  254. BEGIN
  255. c := nextc(text);
  256. i := 0;
  257. WHILE (i < LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
  258. str[i] := c;
  259. c := nextc(text);
  260. INC(i)
  261. END;
  262. str[i] := 0X;
  263. IF (i = LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof THEN
  264. lex.sym := lxERROR05
  265. END;
  266. IF c = quot THEN
  267. TXT.next(text);
  268. IF i # 1 THEN
  269. lex.sym := lxSTRING
  270. ELSE
  271. lex.sym := lxCHAR;
  272. ARITH.setChar(lex.value, ORD(str[0]))
  273. END
  274. ELSIF lex.sym # lxERROR05 THEN
  275. lex.sym := lxERROR03
  276. END;
  277. IF lex.sym = lxSTRING THEN
  278. lex.string := enterStr(str);
  279. lex.value.typ := ARITH.tSTRING;
  280. lex.value.string := lex.string
  281. END
  282. END string;
  283. PROCEDURE comment (text: TXT.TEXT);
  284. VAR
  285. c: CHAR;
  286. cond, depth: INTEGER;
  287. BEGIN
  288. cond := 0;
  289. depth := 1;
  290. REPEAT
  291. c := text.peak;
  292. TXT.next(text);
  293. IF c = "*" THEN
  294. IF cond = 1 THEN
  295. cond := 0;
  296. INC(depth)
  297. ELSE
  298. cond := 2
  299. END
  300. ELSIF c = ")" THEN
  301. IF cond = 2 THEN
  302. DEC(depth)
  303. END;
  304. cond := 0
  305. ELSIF c = "(" THEN
  306. cond := 1
  307. ELSE
  308. cond := 0
  309. END
  310. UNTIL (depth = 0) OR text.eof
  311. END comment;
  312. PROCEDURE delimiter (text: TXT.TEXT; c: CHAR): INTEGER;
  313. VAR
  314. sym: INTEGER;
  315. c0: CHAR;
  316. BEGIN
  317. c0 := c;
  318. c := nextc(text);
  319. CASE c0 OF
  320. |"+":
  321. sym := lxPLUS
  322. |"-":
  323. sym := lxMINUS
  324. |"*":
  325. sym := lxMUL
  326. |"/":
  327. sym := lxSLASH;
  328. IF c = "/" THEN
  329. sym := lxCOMMENT;
  330. REPEAT
  331. TXT.next(text)
  332. UNTIL text.eol OR text.eof
  333. END
  334. |"~":
  335. sym := lxNOT
  336. |"&":
  337. sym := lxAND
  338. |".":
  339. sym := lxPOINT;
  340. IF c = "." THEN
  341. sym := lxRANGE;
  342. TXT.next(text)
  343. END
  344. |",":
  345. sym := lxCOMMA
  346. |";":
  347. sym := lxSEMI
  348. |"|":
  349. sym := lxBAR
  350. |"(":
  351. sym := lxLROUND;
  352. IF c = "*" THEN
  353. sym := lxCOMMENT;
  354. TXT.next(text);
  355. comment(text)
  356. END
  357. |"[":
  358. sym := lxLSQUARE
  359. |"{":
  360. sym := lxLCURLY
  361. |"^":
  362. sym := lxCARET
  363. |"=":
  364. sym := lxEQ
  365. |"#":
  366. sym := lxNE
  367. |"<":
  368. sym := lxLT;
  369. IF c = "=" THEN
  370. sym := lxLE;
  371. TXT.next(text)
  372. END
  373. |">":
  374. sym := lxGT;
  375. IF c = "=" THEN
  376. sym := lxGE;
  377. TXT.next(text)
  378. END
  379. |":":
  380. sym := lxCOLON;
  381. IF c = "=" THEN
  382. sym := lxASSIGN;
  383. TXT.next(text)
  384. END
  385. |")":
  386. sym := lxRROUND
  387. |"]":
  388. sym := lxRSQUARE
  389. |"}":
  390. sym := lxRCURLY
  391. END
  392. RETURN sym
  393. END delimiter;
  394. PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
  395. VAR
  396. c: CHAR;
  397. PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER);
  398. BEGIN
  399. IF ~cond THEN
  400. ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno)
  401. END
  402. END check;
  403. PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN;
  404. VAR
  405. cur: DEF;
  406. BEGIN
  407. cur := def.first(DEF);
  408. WHILE (cur # NIL) & (cur.ident # str) DO
  409. cur := cur.next(DEF)
  410. END
  411. RETURN cur # NIL
  412. END IsDef;
  413. PROCEDURE Skip (text: SCANNER);
  414. VAR
  415. i: INTEGER;
  416. BEGIN
  417. i := 0;
  418. WHILE (i <= text.ifc) & ~text._skip[i] DO
  419. INC(i)
  420. END;
  421. text.skip := i <= text.ifc
  422. END Skip;
  423. PROCEDURE prep_if (text: SCANNER; VAR lex: LEX);
  424. VAR
  425. skip: BOOLEAN;
  426. BEGIN
  427. INC(text.ifc);
  428. text._elsif[text.ifc] := lex.sym = lxELSIF;
  429. IF lex.sym = lxIF THEN
  430. INC(text.elsec);
  431. text._else[text.elsec] := FALSE
  432. END;
  433. _if := TRUE;
  434. skip := TRUE;
  435. text.skip := FALSE;
  436. Next(text, lex);
  437. check(lex.sym = lxLROUND, text, lex, 64);
  438. Next(text, lex);
  439. check(lex.sym = lxIDENT, text, lex, 22);
  440. REPEAT
  441. IF IsDef(lex.ident.s) THEN
  442. skip := FALSE
  443. END;
  444. Next(text, lex);
  445. IF lex.sym = lxBAR THEN
  446. Next(text, lex);
  447. check(lex.sym = lxIDENT, text, lex, 22)
  448. ELSE
  449. check(lex.sym = lxRROUND, text, lex, 33)
  450. END
  451. UNTIL lex.sym = lxRROUND;
  452. _if := FALSE;
  453. text._skip[text.ifc] := skip;
  454. Skip(text);
  455. Next(text, lex)
  456. END prep_if;
  457. PROCEDURE prep_end (text: SCANNER; VAR lex: LEX);
  458. BEGIN
  459. check(text.ifc > 0, text, lex, 118);
  460. IF lex.sym = lxEND THEN
  461. WHILE text._elsif[text.ifc] DO
  462. DEC(text.ifc)
  463. END;
  464. DEC(text.ifc);
  465. DEC(text.elsec)
  466. ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
  467. check(~text._else[text.elsec], text, lex, 118);
  468. text._skip[text.ifc] := ~text._skip[text.ifc];
  469. text._else[text.elsec] := lex.sym = lxELSE
  470. END;
  471. Skip(text);
  472. IF lex.sym = lxELSIF THEN
  473. prep_if(text, lex)
  474. ELSE
  475. Next(text, lex)
  476. END
  477. END prep_end;
  478. BEGIN
  479. REPEAT
  480. c := text.peak;
  481. WHILE S.space(c) DO
  482. c := nextc(text)
  483. END;
  484. lex.pos.line := text.line;
  485. lex.pos.col := text.col;
  486. IF S.letter(c) THEN
  487. ident(text, lex)
  488. ELSIF S.digit(c) THEN
  489. number(text, lex)
  490. ELSIF (c = '"') OR (c = "'") THEN
  491. string(text, lex, c)
  492. ELSIF delimiters[ORD(c)] THEN
  493. lex.sym := delimiter(text, c)
  494. ELSIF c = "$" THEN
  495. IF S.letter(nextc(text)) THEN
  496. ident(text, lex);
  497. IF lex.sym = lxIF THEN
  498. IF ~_if THEN
  499. prep_if(text, lex)
  500. END
  501. ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
  502. IF ~_if THEN
  503. prep_end(text, lex)
  504. END
  505. ELSE
  506. check(FALSE, text, lex, 119)
  507. END
  508. ELSE
  509. check(FALSE, text, lex, 119)
  510. END
  511. ELSIF c = 0X THEN
  512. lex.sym := lxEOF;
  513. text.skip := FALSE;
  514. IF text.eof THEN
  515. INC(lex.pos.col)
  516. END
  517. ELSIF (c = 7FX) & upto THEN
  518. upto := FALSE;
  519. lex.sym := lxRANGE;
  520. DEC(lex.pos.col);
  521. TXT.next(text)
  522. ELSE
  523. TXT.next(text);
  524. lex.sym := lxERROR04
  525. END;
  526. IF lex.sym < 0 THEN
  527. lex.error := -lex.sym
  528. ELSE
  529. lex.error := 0
  530. END
  531. UNTIL (lex.sym # lxCOMMENT) & ~text.skip
  532. END Next;
  533. PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
  534. RETURN TXT.open(name)
  535. END open;
  536. PROCEDURE close* (VAR scanner: SCANNER);
  537. BEGIN
  538. TXT.close(scanner)
  539. END close;
  540. PROCEDURE init* (lower: BOOLEAN);
  541. VAR
  542. i: INTEGER;
  543. delim: ARRAY 23 OF CHAR;
  544. BEGIN
  545. upto := FALSE;
  546. LowerCase := lower;
  547. FOR i := 0 TO 255 DO
  548. delimiters[i] := FALSE
  549. END;
  550. delim := "+-*/~&.,;|([{^=#<>:)]}";
  551. FOR i := 0 TO LEN(delim) - 2 DO
  552. delimiters[ORD(delim[i])] := TRUE
  553. END;
  554. enterKW("array", 0);
  555. enterKW("begin", 1);
  556. enterKW("by", 2);
  557. enterKW("case", 3);
  558. enterKW("const", 4);
  559. enterKW("div", 5);
  560. enterKW("do", 6);
  561. enterKW("else", 7);
  562. enterKW("elsif", 8);
  563. enterKW("end", 9);
  564. enterKW("false", 10);
  565. enterKW("for", 11);
  566. enterKW("if", 12);
  567. enterKW("import", 13);
  568. enterKW("in", 14);
  569. enterKW("is", 15);
  570. enterKW("mod", 16);
  571. enterKW("module", 17);
  572. enterKW("nil", 18);
  573. enterKW("of", 19);
  574. enterKW("or", 20);
  575. enterKW("pointer", 21);
  576. enterKW("procedure", 22);
  577. enterKW("record", 23);
  578. enterKW("repeat", 24);
  579. enterKW("return", 25);
  580. enterKW("then", 26);
  581. enterKW("to", 27);
  582. enterKW("true", 28);
  583. enterKW("type", 29);
  584. enterKW("until", 30);
  585. enterKW("var", 31);
  586. enterKW("while", 32)
  587. END init;
  588. PROCEDURE NewDef* (str: ARRAY OF CHAR);
  589. VAR
  590. item: DEF;
  591. BEGIN
  592. NEW(item);
  593. COPY(str, item.ident);
  594. LISTS.push(def, item)
  595. END NewDef;
  596. BEGIN
  597. def := LISTS.create(NIL);
  598. strings := LISTS.create(NIL)
  599. END SCAN.