SCAN.ob07 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776
  1. MODULE SCAN;
  2. IMPORT TXT := TEXTDRV, ARITH, S := STRINGS, ERRORS, LISTS;
  3. CONST
  4. NUMLEN = 256;
  5. IDLEN = 256;
  6. TEXTLEN = 512;
  7. lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3;
  8. lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7;
  9. lxEOF* = 8;
  10. lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24;
  11. lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28;
  12. lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32;
  13. lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36;
  14. lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40;
  15. lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44;
  16. lxASSIGN* = 45; lxRANGE* = 46;
  17. lxKW = 51;
  18. lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54;
  19. lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58;
  20. lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62;
  21. lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66;
  22. lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70;
  23. lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74;
  24. lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78;
  25. lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82;
  26. lxWHILE* = 83;
  27. lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4;
  28. lxERROR05* = -5; (*lxERROR06* = -6;*) lxERROR07* = -7; lxERROR08* = -8;
  29. lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12;
  30. lxERROR13* = -13;
  31. TYPE
  32. TEXTSTR* = ARRAY TEXTLEN OF CHAR;
  33. IDSTR* = ARRAY IDLEN OF CHAR;
  34. DEF = POINTER TO RECORD (LISTS.ITEM)
  35. ident: IDSTR
  36. END;
  37. STRING* = POINTER TO RECORD (LISTS.ITEM)
  38. s*: TEXTSTR;
  39. offset*, offsetW*, hash: INTEGER
  40. END;
  41. IDENT* = RECORD
  42. s*: IDSTR;
  43. hash*: INTEGER
  44. END;
  45. POSITION* = RECORD
  46. line*, col*: INTEGER
  47. END;
  48. LEX* = RECORD
  49. sym*: INTEGER;
  50. pos*: POSITION;
  51. ident*: IDENT;
  52. string*: STRING;
  53. value*: ARITH.VALUE;
  54. error*: INTEGER
  55. END;
  56. SCANNER* = TXT.TEXT;
  57. KEYWORD = ARRAY 10 OF CHAR;
  58. VAR
  59. delimiters: ARRAY 256 OF BOOLEAN;
  60. upto, LowerCase, _if: BOOLEAN;
  61. strings, def: LISTS.LIST;
  62. KW: ARRAY 33 OF RECORD upper, lower: KEYWORD; uhash, lhash: INTEGER END;
  63. PROCEDURE enterKW (s: KEYWORD; idx: INTEGER);
  64. BEGIN
  65. KW[idx].lower := s;
  66. KW[idx].upper := s;
  67. S.UpCase(KW[idx].upper);
  68. KW[idx].uhash := S.HashStr(KW[idx].upper);
  69. KW[idx].lhash := S.HashStr(KW[idx].lower);
  70. END enterKW;
  71. PROCEDURE checkKW (ident: IDENT): INTEGER;
  72. VAR
  73. i, res: INTEGER;
  74. BEGIN
  75. res := lxIDENT;
  76. i := 0;
  77. WHILE i < LEN(KW) DO
  78. IF (KW[i].uhash = ident.hash) & (KW[i].upper = ident.s)
  79. OR LowerCase & (KW[i].lhash = ident.hash) & (KW[i].lower = ident.s) THEN
  80. res := i + lxKW;
  81. i := LEN(KW)
  82. END;
  83. INC(i)
  84. END
  85. RETURN res
  86. END checkKW;
  87. PROCEDURE enterStr* (s: TEXTSTR): STRING;
  88. VAR
  89. str, res: STRING;
  90. hash: INTEGER;
  91. BEGIN
  92. hash := S.HashStr(s);
  93. str := strings.first(STRING);
  94. res := NIL;
  95. WHILE str # NIL DO
  96. IF (str.hash = hash) & (str.s = s) THEN
  97. res := str;
  98. str := NIL
  99. ELSE
  100. str := str.next(STRING)
  101. END
  102. END;
  103. IF res = NIL THEN
  104. NEW(res);
  105. res.s := s;
  106. res.offset := -1;
  107. res.offsetW := -1;
  108. res.hash := hash;
  109. LISTS.push(strings, res)
  110. END
  111. RETURN res
  112. END enterStr;
  113. PROCEDURE nextc (text: TXT.TEXT): CHAR;
  114. BEGIN
  115. TXT.next(text)
  116. RETURN text.peak
  117. END nextc;
  118. PROCEDURE setIdent* (VAR ident: IDENT; s: IDSTR);
  119. BEGIN
  120. ident.s := s;
  121. ident.hash := S.HashStr(s)
  122. END setIdent;
  123. PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
  124. VAR
  125. c: CHAR;
  126. i: INTEGER;
  127. BEGIN
  128. c := text.peak;
  129. ASSERT(S.letter(c));
  130. i := 0;
  131. WHILE (i < IDLEN - 1) & (S.letter(c) OR S.digit(c)) DO
  132. lex.ident.s[i] := c;
  133. INC(i);
  134. c := nextc(text)
  135. END;
  136. lex.ident.s[i] := 0X;
  137. lex.ident.hash := S.HashStr(lex.ident.s);
  138. lex.sym := checkKW(lex.ident);
  139. IF S.letter(c) OR S.digit(c) THEN
  140. ERRORS.WarningMsg(lex.pos.line, lex.pos.col, 2);
  141. WHILE S.letter(c) OR S.digit(c) DO
  142. c := nextc(text)
  143. END
  144. END
  145. END ident;
  146. PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
  147. TYPE
  148. NUMSTR = ARRAY NUMLEN OF CHAR;
  149. VAR
  150. c: CHAR;
  151. hex: BOOLEAN;
  152. error, sym, i: INTEGER;
  153. num: NUMSTR;
  154. PROCEDURE push (VAR num: NUMSTR; VAR i: INTEGER; c: CHAR);
  155. BEGIN
  156. IF i < NUMLEN - 1 THEN
  157. num[i] := c;
  158. INC(i)
  159. END
  160. END push;
  161. BEGIN
  162. c := text.peak;
  163. ASSERT(S.digit(c));
  164. i := 0;
  165. error := 0;
  166. sym := lxINTEGER;
  167. hex := FALSE;
  168. WHILE S.digit(c) DO
  169. push(num, i, c);
  170. c := nextc(text)
  171. END;
  172. WHILE S.hexdigit(c) OR LowerCase & ("a" <= c) & (c <= "f") DO
  173. S.cap(c);
  174. push(num, i, c);
  175. c := nextc(text);
  176. hex := TRUE
  177. END;
  178. IF (c = "H") OR LowerCase & (c = "h") THEN
  179. push(num, i, c);
  180. TXT.next(text);
  181. sym := lxHEX
  182. ELSIF (c = "X") OR LowerCase & (c = "x") THEN
  183. push(num, i, c);
  184. TXT.next(text);
  185. sym := lxCHAR
  186. ELSIF c = "." THEN
  187. IF hex THEN
  188. sym := lxERROR01
  189. ELSE
  190. c := nextc(text);
  191. IF c # "." THEN
  192. push(num, i, ".");
  193. sym := lxFLOAT
  194. ELSE
  195. sym := lxINTEGER;
  196. text.peak := 7FX;
  197. upto := TRUE
  198. END;
  199. WHILE S.digit(c) DO
  200. push(num, i, c);
  201. c := nextc(text)
  202. END;
  203. IF (c = "E") OR LowerCase & (c = "e") THEN
  204. push(num, i, c);
  205. c := nextc(text);
  206. IF (c = "+") OR (c = "-") THEN
  207. push(num, i, c);
  208. c := nextc(text)
  209. END;
  210. IF S.digit(c) THEN
  211. WHILE S.digit(c) DO
  212. push(num, i, c);
  213. c := nextc(text)
  214. END
  215. ELSE
  216. sym := lxERROR02
  217. END
  218. END
  219. END
  220. ELSIF hex THEN
  221. sym := lxERROR01
  222. END;
  223. IF (i = NUMLEN - 1) & (sym >= 0) THEN
  224. sym := lxERROR07
  225. END;
  226. num[i] := 0X;
  227. IF sym = lxINTEGER THEN
  228. ARITH.iconv(num, lex.value, error)
  229. ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN
  230. ARITH.hconv(num, lex.value, error)
  231. ELSIF sym = lxFLOAT THEN
  232. ARITH.fconv(num, lex.value, error)
  233. END;
  234. CASE error OF
  235. |0:
  236. |1: sym := lxERROR08
  237. |2: sym := lxERROR09
  238. |3: sym := lxERROR10
  239. |4: sym := lxERROR11
  240. |5: sym := lxERROR12
  241. END;
  242. lex.sym := sym
  243. END number;
  244. PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
  245. VAR
  246. c: CHAR;
  247. i: INTEGER;
  248. str: TEXTSTR;
  249. BEGIN
  250. c := nextc(text);
  251. i := 0;
  252. WHILE (i < LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
  253. str[i] := c;
  254. c := nextc(text);
  255. INC(i)
  256. END;
  257. str[i] := 0X;
  258. IF (i = LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof THEN
  259. lex.sym := lxERROR05
  260. END;
  261. IF c = quot THEN
  262. TXT.next(text);
  263. IF i # 1 THEN
  264. lex.sym := lxSTRING
  265. ELSE
  266. lex.sym := lxCHAR;
  267. ARITH.setChar(lex.value, ORD(str[0]))
  268. END
  269. ELSIF lex.sym # lxERROR05 THEN
  270. lex.sym := lxERROR03
  271. END;
  272. IF lex.sym = lxSTRING THEN
  273. lex.string := enterStr(str);
  274. lex.value.typ := ARITH.tSTRING;
  275. lex.value.string := lex.string
  276. END
  277. END string;
  278. PROCEDURE comment (text: TXT.TEXT);
  279. VAR
  280. c: CHAR;
  281. cond, depth: INTEGER;
  282. BEGIN
  283. cond := 0;
  284. depth := 1;
  285. REPEAT
  286. c := text.peak;
  287. TXT.next(text);
  288. IF c = "*" THEN
  289. IF cond = 1 THEN
  290. cond := 0;
  291. INC(depth)
  292. ELSE
  293. cond := 2
  294. END
  295. ELSIF c = ")" THEN
  296. IF cond = 2 THEN
  297. DEC(depth)
  298. END;
  299. cond := 0
  300. ELSIF c = "(" THEN
  301. cond := 1
  302. ELSE
  303. cond := 0
  304. END
  305. UNTIL (depth = 0) OR text.eof
  306. END comment;
  307. PROCEDURE delimiter (text: TXT.TEXT; c: CHAR): INTEGER;
  308. VAR
  309. sym: INTEGER;
  310. c0: CHAR;
  311. BEGIN
  312. c0 := c;
  313. c := nextc(text);
  314. CASE c0 OF
  315. |"+":
  316. sym := lxPLUS
  317. |"-":
  318. sym := lxMINUS
  319. |"*":
  320. sym := lxMUL
  321. |"/":
  322. sym := lxSLASH;
  323. IF c = "/" THEN
  324. sym := lxCOMMENT;
  325. REPEAT
  326. TXT.next(text)
  327. UNTIL text.eol OR text.eof
  328. END
  329. |"~":
  330. sym := lxNOT
  331. |"&":
  332. sym := lxAND
  333. |".":
  334. sym := lxPOINT;
  335. IF c = "." THEN
  336. sym := lxRANGE;
  337. TXT.next(text)
  338. END
  339. |",":
  340. sym := lxCOMMA
  341. |";":
  342. sym := lxSEMI
  343. |"|":
  344. sym := lxBAR
  345. |"(":
  346. sym := lxLROUND;
  347. IF c = "*" THEN
  348. sym := lxCOMMENT;
  349. TXT.next(text);
  350. comment(text)
  351. END
  352. |"[":
  353. sym := lxLSQUARE
  354. |"{":
  355. sym := lxLCURLY
  356. |"^":
  357. sym := lxCARET
  358. |"=":
  359. sym := lxEQ
  360. |"#":
  361. sym := lxNE
  362. |"<":
  363. sym := lxLT;
  364. IF c = "=" THEN
  365. sym := lxLE;
  366. TXT.next(text)
  367. END
  368. |">":
  369. sym := lxGT;
  370. IF c = "=" THEN
  371. sym := lxGE;
  372. TXT.next(text)
  373. END
  374. |":":
  375. sym := lxCOLON;
  376. IF c = "=" THEN
  377. sym := lxASSIGN;
  378. TXT.next(text)
  379. END
  380. |")":
  381. sym := lxRROUND
  382. |"]":
  383. sym := lxRSQUARE
  384. |"}":
  385. sym := lxRCURLY
  386. END
  387. RETURN sym
  388. END delimiter;
  389. PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
  390. VAR
  391. c: CHAR;
  392. PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER);
  393. BEGIN
  394. IF ~cond THEN
  395. ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno)
  396. END
  397. END check;
  398. PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN;
  399. VAR
  400. cur: DEF;
  401. BEGIN
  402. cur := def.first(DEF);
  403. WHILE (cur # NIL) & (cur.ident # str) DO
  404. cur := cur.next(DEF)
  405. END
  406. RETURN cur # NIL
  407. END IsDef;
  408. PROCEDURE Skip (text: SCANNER);
  409. VAR
  410. i: INTEGER;
  411. BEGIN
  412. i := 0;
  413. WHILE (i <= text.ifc) & ~text._skip[i] DO
  414. INC(i)
  415. END;
  416. text.skip := i <= text.ifc
  417. END Skip;
  418. PROCEDURE prep_if (text: SCANNER; VAR lex: LEX);
  419. VAR
  420. skip: BOOLEAN;
  421. BEGIN
  422. INC(text.ifc);
  423. text._elsif[text.ifc] := lex.sym = lxELSIF;
  424. IF lex.sym = lxIF THEN
  425. INC(text.elsec);
  426. text._else[text.elsec] := FALSE
  427. END;
  428. _if := TRUE;
  429. skip := TRUE;
  430. text.skip := FALSE;
  431. Next(text, lex);
  432. check(lex.sym = lxLROUND, text, lex, 64);
  433. Next(text, lex);
  434. check(lex.sym = lxIDENT, text, lex, 22);
  435. REPEAT
  436. IF IsDef(lex.ident.s) THEN
  437. skip := FALSE
  438. END;
  439. Next(text, lex);
  440. IF lex.sym = lxBAR THEN
  441. Next(text, lex);
  442. check(lex.sym = lxIDENT, text, lex, 22)
  443. ELSE
  444. check(lex.sym = lxRROUND, text, lex, 33)
  445. END
  446. UNTIL lex.sym = lxRROUND;
  447. _if := FALSE;
  448. text._skip[text.ifc] := skip;
  449. Skip(text);
  450. Next(text, lex)
  451. END prep_if;
  452. PROCEDURE prep_end (text: SCANNER; VAR lex: LEX);
  453. BEGIN
  454. check(text.ifc > 0, text, lex, 118);
  455. IF lex.sym = lxEND THEN
  456. WHILE text._elsif[text.ifc] DO
  457. DEC(text.ifc)
  458. END;
  459. DEC(text.ifc);
  460. DEC(text.elsec)
  461. ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
  462. check(~text._else[text.elsec], text, lex, 118);
  463. text._skip[text.ifc] := ~text._skip[text.ifc];
  464. text._else[text.elsec] := lex.sym = lxELSE
  465. END;
  466. Skip(text);
  467. IF lex.sym = lxELSIF THEN
  468. prep_if(text, lex)
  469. ELSE
  470. Next(text, lex)
  471. END
  472. END prep_end;
  473. BEGIN
  474. REPEAT
  475. c := text.peak;
  476. WHILE S.space(c) DO
  477. c := nextc(text)
  478. END;
  479. lex.pos.line := text.line;
  480. lex.pos.col := text.col;
  481. IF S.letter(c) THEN
  482. ident(text, lex)
  483. ELSIF S.digit(c) THEN
  484. number(text, lex)
  485. ELSIF (c = '"') OR (c = "'") THEN
  486. string(text, lex, c)
  487. ELSIF delimiters[ORD(c)] THEN
  488. lex.sym := delimiter(text, c)
  489. ELSIF c = "$" THEN
  490. IF S.letter(nextc(text)) THEN
  491. ident(text, lex);
  492. IF lex.sym = lxIF THEN
  493. IF ~_if THEN
  494. prep_if(text, lex)
  495. END
  496. ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
  497. IF ~_if THEN
  498. prep_end(text, lex)
  499. END
  500. ELSE
  501. check(FALSE, text, lex, 119)
  502. END
  503. ELSE
  504. check(FALSE, text, lex, 119)
  505. END
  506. ELSIF c = 0X THEN
  507. lex.sym := lxEOF;
  508. text.skip := FALSE;
  509. IF text.eof THEN
  510. INC(lex.pos.col)
  511. END
  512. ELSIF (c = 7FX) & upto THEN
  513. upto := FALSE;
  514. lex.sym := lxRANGE;
  515. DEC(lex.pos.col);
  516. TXT.next(text)
  517. ELSE
  518. TXT.next(text);
  519. lex.sym := lxERROR04
  520. END;
  521. IF lex.sym < 0 THEN
  522. lex.error := -lex.sym
  523. ELSE
  524. lex.error := 0
  525. END
  526. UNTIL (lex.sym # lxCOMMENT) & ~text.skip
  527. END Next;
  528. PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
  529. RETURN TXT.open(name)
  530. END open;
  531. PROCEDURE close* (VAR scanner: SCANNER);
  532. BEGIN
  533. TXT.close(scanner)
  534. END close;
  535. PROCEDURE init* (lower: BOOLEAN);
  536. VAR
  537. i: INTEGER;
  538. delim: ARRAY 23 OF CHAR;
  539. BEGIN
  540. upto := FALSE;
  541. LowerCase := lower;
  542. FOR i := 0 TO 255 DO
  543. delimiters[i] := FALSE
  544. END;
  545. delim := "+-*/~&.,;|([{^=#<>:)]}";
  546. FOR i := 0 TO LEN(delim) - 2 DO
  547. delimiters[ORD(delim[i])] := TRUE
  548. END;
  549. enterKW("array", 0);
  550. enterKW("begin", 1);
  551. enterKW("by", 2);
  552. enterKW("case", 3);
  553. enterKW("const", 4);
  554. enterKW("div", 5);
  555. enterKW("do", 6);
  556. enterKW("else", 7);
  557. enterKW("elsif", 8);
  558. enterKW("end", 9);
  559. enterKW("false", 10);
  560. enterKW("for", 11);
  561. enterKW("if", 12);
  562. enterKW("import", 13);
  563. enterKW("in", 14);
  564. enterKW("is", 15);
  565. enterKW("mod", 16);
  566. enterKW("module", 17);
  567. enterKW("nil", 18);
  568. enterKW("of", 19);
  569. enterKW("or", 20);
  570. enterKW("pointer", 21);
  571. enterKW("procedure", 22);
  572. enterKW("record", 23);
  573. enterKW("repeat", 24);
  574. enterKW("return", 25);
  575. enterKW("then", 26);
  576. enterKW("to", 27);
  577. enterKW("true", 28);
  578. enterKW("type", 29);
  579. enterKW("until", 30);
  580. enterKW("var", 31);
  581. enterKW("while", 32)
  582. END init;
  583. PROCEDURE NewDef* (str: ARRAY OF CHAR);
  584. VAR
  585. item: DEF;
  586. BEGIN
  587. NEW(item);
  588. COPY(str, item.ident);
  589. LISTS.push(def, item)
  590. END NewDef;
  591. BEGIN
  592. def := LISTS.create(NIL);
  593. strings := LISTS.create(NIL)
  594. END SCAN.