SCAN.ob07 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779
  1. MODULE SCAN;
  2. IMPORT TXT := TEXTDRV,
  3. ARITH,
  4. S := STRINGS IN "./strings/STRINGS.ob07",
  5. ERRORS, LISTS;
  6. CONST
  7. NUMLEN = 256;
  8. IDLEN = 256;
  9. TEXTLEN = 512;
  10. lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3;
  11. lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7;
  12. lxEOF* = 8;
  13. lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24;
  14. lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28;
  15. lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32;
  16. lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36;
  17. lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40;
  18. lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44;
  19. lxASSIGN* = 45; lxRANGE* = 46;
  20. lxKW = 51;
  21. lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54;
  22. lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58;
  23. lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62;
  24. lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66;
  25. lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70;
  26. lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74;
  27. lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78;
  28. lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82;
  29. lxWHILE* = 83;
  30. lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4;
  31. lxERROR05* = -5; (*lxERROR06* = -6;*) lxERROR07* = -7; lxERROR08* = -8;
  32. lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12;
  33. lxERROR13* = -13;
  34. TYPE
  35. TEXTSTR* = ARRAY TEXTLEN OF CHAR;
  36. IDSTR* = ARRAY IDLEN OF CHAR;
  37. DEF = POINTER TO RECORD (LISTS.ITEM)
  38. ident: IDSTR
  39. END;
  40. STRING* = POINTER TO RECORD (LISTS.ITEM)
  41. s*: TEXTSTR;
  42. offset*, offsetW*, hash: INTEGER
  43. END;
  44. IDENT* = RECORD
  45. s*: IDSTR;
  46. hash*: INTEGER
  47. END;
  48. POSITION* = RECORD
  49. line*, col*: INTEGER
  50. END;
  51. LEX* = RECORD
  52. sym*: INTEGER;
  53. pos*: POSITION;
  54. ident*: IDENT;
  55. string*: STRING;
  56. value*: ARITH.VALUE;
  57. error*: INTEGER
  58. END;
  59. SCANNER* = TXT.TEXT;
  60. KEYWORD = ARRAY 10 OF CHAR;
  61. VAR
  62. delimiters: ARRAY 256 OF BOOLEAN;
  63. upto, LowerCase, _if: BOOLEAN;
  64. strings, def: LISTS.LIST;
  65. KW: ARRAY 33 OF RECORD upper, lower: KEYWORD; uhash, lhash: INTEGER END;
  66. PROCEDURE enterKW (s: KEYWORD; idx: INTEGER);
  67. BEGIN
  68. KW[idx].lower := s;
  69. KW[idx].upper := s;
  70. S.UpCase(KW[idx].upper);
  71. KW[idx].uhash := S.HashStr(KW[idx].upper);
  72. KW[idx].lhash := S.HashStr(KW[idx].lower);
  73. END enterKW;
  74. PROCEDURE checkKW (ident: IDENT): INTEGER;
  75. VAR
  76. i, res: INTEGER;
  77. BEGIN
  78. res := lxIDENT;
  79. i := 0;
  80. WHILE i < LEN(KW) DO
  81. IF (KW[i].uhash = ident.hash) & (KW[i].upper = ident.s)
  82. OR LowerCase & (KW[i].lhash = ident.hash) & (KW[i].lower = ident.s) THEN
  83. res := i + lxKW;
  84. i := LEN(KW)
  85. END;
  86. INC(i)
  87. END
  88. RETURN res
  89. END checkKW;
  90. PROCEDURE enterStr* (s: TEXTSTR): STRING;
  91. VAR
  92. str, res: STRING;
  93. hash: INTEGER;
  94. BEGIN
  95. hash := S.HashStr(s);
  96. str := strings.first(STRING);
  97. res := NIL;
  98. WHILE str # NIL DO
  99. IF (str.hash = hash) & (str.s = s) THEN
  100. res := str;
  101. str := NIL
  102. ELSE
  103. str := str.next(STRING)
  104. END
  105. END;
  106. IF res = NIL THEN
  107. NEW(res);
  108. res.s := s;
  109. res.offset := -1;
  110. res.offsetW := -1;
  111. res.hash := hash;
  112. LISTS.push(strings, res)
  113. END
  114. RETURN res
  115. END enterStr;
  116. PROCEDURE nextc (text: TXT.TEXT): CHAR;
  117. BEGIN
  118. TXT.next(text)
  119. RETURN text.peak
  120. END nextc;
  121. PROCEDURE setIdent* (VAR ident: IDENT; s: IDSTR);
  122. BEGIN
  123. ident.s := s;
  124. ident.hash := S.HashStr(s)
  125. END setIdent;
  126. PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
  127. VAR
  128. c: CHAR;
  129. i: INTEGER;
  130. BEGIN
  131. c := text.peak;
  132. ASSERT(S.letter(c));
  133. i := 0;
  134. WHILE (i < IDLEN - 1) & (S.letter(c) OR S.digit(c)) DO
  135. lex.ident.s[i] := c;
  136. INC(i);
  137. c := nextc(text)
  138. END;
  139. lex.ident.s[i] := 0X;
  140. lex.ident.hash := S.HashStr(lex.ident.s);
  141. lex.sym := checkKW(lex.ident);
  142. IF S.letter(c) OR S.digit(c) THEN
  143. ERRORS.WarningMsg(lex.pos.line, lex.pos.col, 2);
  144. WHILE S.letter(c) OR S.digit(c) DO
  145. c := nextc(text)
  146. END
  147. END
  148. END ident;
  149. PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
  150. TYPE
  151. NUMSTR = ARRAY NUMLEN OF CHAR;
  152. VAR
  153. c: CHAR;
  154. hex: BOOLEAN;
  155. error, sym, i: INTEGER;
  156. num: NUMSTR;
  157. PROCEDURE push (VAR num: NUMSTR; VAR i: INTEGER; c: CHAR);
  158. BEGIN
  159. IF i < NUMLEN - 1 THEN
  160. num[i] := c;
  161. INC(i)
  162. END
  163. END push;
  164. BEGIN
  165. c := text.peak;
  166. ASSERT(S.digit(c));
  167. i := 0;
  168. error := 0;
  169. sym := lxINTEGER;
  170. hex := FALSE;
  171. WHILE S.digit(c) DO
  172. push(num, i, c);
  173. c := nextc(text)
  174. END;
  175. WHILE S.hexdigit(c) OR LowerCase & ("a" <= c) & (c <= "f") DO
  176. S.cap(c);
  177. push(num, i, c);
  178. c := nextc(text);
  179. hex := TRUE
  180. END;
  181. IF (c = "H") OR LowerCase & (c = "h") THEN
  182. push(num, i, c);
  183. TXT.next(text);
  184. sym := lxHEX
  185. ELSIF (c = "X") OR LowerCase & (c = "x") THEN
  186. push(num, i, c);
  187. TXT.next(text);
  188. sym := lxCHAR
  189. ELSIF c = "." THEN
  190. IF hex THEN
  191. sym := lxERROR01
  192. ELSE
  193. c := nextc(text);
  194. IF c # "." THEN
  195. push(num, i, ".");
  196. sym := lxFLOAT
  197. ELSE
  198. sym := lxINTEGER;
  199. text.peak := 7FX;
  200. upto := TRUE
  201. END;
  202. WHILE S.digit(c) DO
  203. push(num, i, c);
  204. c := nextc(text)
  205. END;
  206. IF (c = "E") OR LowerCase & (c = "e") THEN
  207. push(num, i, c);
  208. c := nextc(text);
  209. IF (c = "+") OR (c = "-") THEN
  210. push(num, i, c);
  211. c := nextc(text)
  212. END;
  213. IF S.digit(c) THEN
  214. WHILE S.digit(c) DO
  215. push(num, i, c);
  216. c := nextc(text)
  217. END
  218. ELSE
  219. sym := lxERROR02
  220. END
  221. END
  222. END
  223. ELSIF hex THEN
  224. sym := lxERROR01
  225. END;
  226. IF (i = NUMLEN - 1) & (sym >= 0) THEN
  227. sym := lxERROR07
  228. END;
  229. num[i] := 0X;
  230. IF sym = lxINTEGER THEN
  231. ARITH.iconv(num, lex.value, error)
  232. ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN
  233. ARITH.hconv(num, lex.value, error)
  234. ELSIF sym = lxFLOAT THEN
  235. ARITH.fconv(num, lex.value, error)
  236. END;
  237. CASE error OF
  238. |0:
  239. |1: sym := lxERROR08
  240. |2: sym := lxERROR09
  241. |3: sym := lxERROR10
  242. |4: sym := lxERROR11
  243. |5: sym := lxERROR12
  244. END;
  245. lex.sym := sym
  246. END number;
  247. PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
  248. VAR
  249. c: CHAR;
  250. i: INTEGER;
  251. str: TEXTSTR;
  252. BEGIN
  253. c := nextc(text);
  254. i := 0;
  255. WHILE (i < LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
  256. str[i] := c;
  257. c := nextc(text);
  258. INC(i)
  259. END;
  260. str[i] := 0X;
  261. IF (i = LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof THEN
  262. lex.sym := lxERROR05
  263. END;
  264. IF c = quot THEN
  265. TXT.next(text);
  266. IF i # 1 THEN
  267. lex.sym := lxSTRING
  268. ELSE
  269. lex.sym := lxCHAR;
  270. ARITH.setChar(lex.value, ORD(str[0]))
  271. END
  272. ELSIF lex.sym # lxERROR05 THEN
  273. lex.sym := lxERROR03
  274. END;
  275. IF lex.sym = lxSTRING THEN
  276. lex.string := enterStr(str);
  277. lex.value.typ := ARITH.tSTRING;
  278. lex.value.string := lex.string
  279. END
  280. END string;
  281. PROCEDURE comment (text: TXT.TEXT);
  282. VAR
  283. c: CHAR;
  284. cond, depth: INTEGER;
  285. BEGIN
  286. cond := 0;
  287. depth := 1;
  288. REPEAT
  289. c := text.peak;
  290. TXT.next(text);
  291. IF c = "*" THEN
  292. IF cond = 1 THEN
  293. cond := 0;
  294. INC(depth)
  295. ELSE
  296. cond := 2
  297. END
  298. ELSIF c = ")" THEN
  299. IF cond = 2 THEN
  300. DEC(depth)
  301. END;
  302. cond := 0
  303. ELSIF c = "(" THEN
  304. cond := 1
  305. ELSE
  306. cond := 0
  307. END
  308. UNTIL (depth = 0) OR text.eof
  309. END comment;
  310. PROCEDURE delimiter (text: TXT.TEXT; c: CHAR): INTEGER;
  311. VAR
  312. sym: INTEGER;
  313. c0: CHAR;
  314. BEGIN
  315. c0 := c;
  316. c := nextc(text);
  317. CASE c0 OF
  318. |"+":
  319. sym := lxPLUS
  320. |"-":
  321. sym := lxMINUS
  322. |"*":
  323. sym := lxMUL
  324. |"/":
  325. sym := lxSLASH;
  326. IF c = "/" THEN
  327. sym := lxCOMMENT;
  328. REPEAT
  329. TXT.next(text)
  330. UNTIL text.eol OR text.eof
  331. END
  332. |"~":
  333. sym := lxNOT
  334. |"&":
  335. sym := lxAND
  336. |".":
  337. sym := lxPOINT;
  338. IF c = "." THEN
  339. sym := lxRANGE;
  340. TXT.next(text)
  341. END
  342. |",":
  343. sym := lxCOMMA
  344. |";":
  345. sym := lxSEMI
  346. |"|":
  347. sym := lxBAR
  348. |"(":
  349. sym := lxLROUND;
  350. IF c = "*" THEN
  351. sym := lxCOMMENT;
  352. TXT.next(text);
  353. comment(text)
  354. END
  355. |"[":
  356. sym := lxLSQUARE
  357. |"{":
  358. sym := lxLCURLY
  359. |"^":
  360. sym := lxCARET
  361. |"=":
  362. sym := lxEQ
  363. |"#":
  364. sym := lxNE
  365. |"<":
  366. sym := lxLT;
  367. IF c = "=" THEN
  368. sym := lxLE;
  369. TXT.next(text)
  370. END
  371. |">":
  372. sym := lxGT;
  373. IF c = "=" THEN
  374. sym := lxGE;
  375. TXT.next(text)
  376. END
  377. |":":
  378. sym := lxCOLON;
  379. IF c = "=" THEN
  380. sym := lxASSIGN;
  381. TXT.next(text)
  382. END
  383. |")":
  384. sym := lxRROUND
  385. |"]":
  386. sym := lxRSQUARE
  387. |"}":
  388. sym := lxRCURLY
  389. END
  390. RETURN sym
  391. END delimiter;
  392. PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
  393. VAR
  394. c: CHAR;
  395. PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER);
  396. BEGIN
  397. IF ~cond THEN
  398. ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno)
  399. END
  400. END check;
  401. PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN;
  402. VAR
  403. cur: DEF;
  404. BEGIN
  405. cur := def.first(DEF);
  406. WHILE (cur # NIL) & (cur.ident # str) DO
  407. cur := cur.next(DEF)
  408. END
  409. RETURN cur # NIL
  410. END IsDef;
  411. PROCEDURE Skip (text: SCANNER);
  412. VAR
  413. i: INTEGER;
  414. BEGIN
  415. i := 0;
  416. WHILE (i <= text.ifc) & ~text._skip[i] DO
  417. INC(i)
  418. END;
  419. text.skip := i <= text.ifc
  420. END Skip;
  421. PROCEDURE prep_if (text: SCANNER; VAR lex: LEX);
  422. VAR
  423. skip: BOOLEAN;
  424. BEGIN
  425. INC(text.ifc);
  426. text._elsif[text.ifc] := lex.sym = lxELSIF;
  427. IF lex.sym = lxIF THEN
  428. INC(text.elsec);
  429. text._else[text.elsec] := FALSE
  430. END;
  431. _if := TRUE;
  432. skip := TRUE;
  433. text.skip := FALSE;
  434. Next(text, lex);
  435. check(lex.sym = lxLROUND, text, lex, 64);
  436. Next(text, lex);
  437. check(lex.sym = lxIDENT, text, lex, 22);
  438. REPEAT
  439. IF IsDef(lex.ident.s) THEN
  440. skip := FALSE
  441. END;
  442. Next(text, lex);
  443. IF lex.sym = lxBAR THEN
  444. Next(text, lex);
  445. check(lex.sym = lxIDENT, text, lex, 22)
  446. ELSE
  447. check(lex.sym = lxRROUND, text, lex, 33)
  448. END
  449. UNTIL lex.sym = lxRROUND;
  450. _if := FALSE;
  451. text._skip[text.ifc] := skip;
  452. Skip(text);
  453. Next(text, lex)
  454. END prep_if;
  455. PROCEDURE prep_end (text: SCANNER; VAR lex: LEX);
  456. BEGIN
  457. check(text.ifc > 0, text, lex, 118);
  458. IF lex.sym = lxEND THEN
  459. WHILE text._elsif[text.ifc] DO
  460. DEC(text.ifc)
  461. END;
  462. DEC(text.ifc);
  463. DEC(text.elsec)
  464. ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
  465. check(~text._else[text.elsec], text, lex, 118);
  466. text._skip[text.ifc] := ~text._skip[text.ifc];
  467. text._else[text.elsec] := lex.sym = lxELSE
  468. END;
  469. Skip(text);
  470. IF lex.sym = lxELSIF THEN
  471. prep_if(text, lex)
  472. ELSE
  473. Next(text, lex)
  474. END
  475. END prep_end;
  476. BEGIN
  477. REPEAT
  478. c := text.peak;
  479. WHILE S.space(c) DO
  480. c := nextc(text)
  481. END;
  482. lex.pos.line := text.line;
  483. lex.pos.col := text.col;
  484. IF S.letter(c) THEN
  485. ident(text, lex)
  486. ELSIF S.digit(c) THEN
  487. number(text, lex)
  488. ELSIF (c = '"') OR (c = "'") THEN
  489. string(text, lex, c)
  490. ELSIF delimiters[ORD(c)] THEN
  491. lex.sym := delimiter(text, c)
  492. ELSIF c = "$" THEN
  493. IF S.letter(nextc(text)) THEN
  494. ident(text, lex);
  495. IF lex.sym = lxIF THEN
  496. IF ~_if THEN
  497. prep_if(text, lex)
  498. END
  499. ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
  500. IF ~_if THEN
  501. prep_end(text, lex)
  502. END
  503. ELSE
  504. check(FALSE, text, lex, 119)
  505. END
  506. ELSE
  507. check(FALSE, text, lex, 119)
  508. END
  509. ELSIF c = 0X THEN
  510. lex.sym := lxEOF;
  511. text.skip := FALSE;
  512. IF text.eof THEN
  513. INC(lex.pos.col)
  514. END
  515. ELSIF (c = 7FX) & upto THEN
  516. upto := FALSE;
  517. lex.sym := lxRANGE;
  518. DEC(lex.pos.col);
  519. TXT.next(text)
  520. ELSE
  521. TXT.next(text);
  522. lex.sym := lxERROR04
  523. END;
  524. IF lex.sym < 0 THEN
  525. lex.error := -lex.sym
  526. ELSE
  527. lex.error := 0
  528. END
  529. UNTIL (lex.sym # lxCOMMENT) & ~text.skip
  530. END Next;
  531. PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
  532. RETURN TXT.open(name)
  533. END open;
  534. PROCEDURE close* (VAR scanner: SCANNER);
  535. BEGIN
  536. TXT.close(scanner)
  537. END close;
  538. PROCEDURE init* (lower: BOOLEAN);
  539. VAR
  540. i: INTEGER;
  541. delim: ARRAY 23 OF CHAR;
  542. BEGIN
  543. upto := FALSE;
  544. LowerCase := lower;
  545. FOR i := 0 TO 255 DO
  546. delimiters[i] := FALSE
  547. END;
  548. delim := "+-*/~&.,;|([{^=#<>:)]}";
  549. FOR i := 0 TO LEN(delim) - 2 DO
  550. delimiters[ORD(delim[i])] := TRUE
  551. END;
  552. enterKW("array", 0);
  553. enterKW("begin", 1);
  554. enterKW("by", 2);
  555. enterKW("case", 3);
  556. enterKW("const", 4);
  557. enterKW("div", 5);
  558. enterKW("do", 6);
  559. enterKW("else", 7);
  560. enterKW("elsif", 8);
  561. enterKW("end", 9);
  562. enterKW("false", 10);
  563. enterKW("for", 11);
  564. enterKW("if", 12);
  565. enterKW("import", 13);
  566. enterKW("in", 14);
  567. enterKW("is", 15);
  568. enterKW("mod", 16);
  569. enterKW("module", 17);
  570. enterKW("nil", 18);
  571. enterKW("of", 19);
  572. enterKW("or", 20);
  573. enterKW("pointer", 21);
  574. enterKW("procedure", 22);
  575. enterKW("record", 23);
  576. enterKW("repeat", 24);
  577. enterKW("return", 25);
  578. enterKW("then", 26);
  579. enterKW("to", 27);
  580. enterKW("true", 28);
  581. enterKW("type", 29);
  582. enterKW("until", 30);
  583. enterKW("var", 31);
  584. enterKW("while", 32)
  585. END init;
  586. PROCEDURE NewDef* (str: ARRAY OF CHAR);
  587. VAR
  588. item: DEF;
  589. BEGIN
  590. NEW(item);
  591. COPY(str, item.ident);
  592. LISTS.push(def, item)
  593. END NewDef;
  594. BEGIN
  595. def := LISTS.create(NIL);
  596. strings := LISTS.create(NIL)
  597. END SCAN.