1
3 package jminusminus;
4
5 import java.io.FileNotFoundException;
6 import java.io.FileReader;
7 import java.io.IOException;
8 import java.io.LineNumberReader;
9 import java.util.Hashtable;
10
11 import static jminusminus.TokenKind.*;
12
13
16 class Scanner {
17 public final static char EOFCH = CharReader.EOFCH;
19
20 private Hashtable<String, TokenKind> reserved;
22
23 private CharReader input;
25
26 private char ch;
28
29 private boolean isInError;
31
32 private String fileName;
34
35 private int line;
37
38
44 public Scanner(String fileName) throws FileNotFoundException {
45 this.input = new CharReader(fileName);
46 this.fileName = fileName;
47 isInError = false;
48
49 reserved = new Hashtable<String, TokenKind>();
51 reserved.put(ABSTRACT.image(), ABSTRACT);
52 reserved.put(BOOLEAN.image(), BOOLEAN);
53 reserved.put(CHAR.image(), CHAR);
54 reserved.put(CLASS.image(), CLASS);
55 reserved.put(ELSE.image(), ELSE);
56 reserved.put(EXTENDS.image(), EXTENDS);
57 reserved.put(FALSE.image(), FALSE);
58 reserved.put(IF.image(), IF);
59 reserved.put(IMPORT.image(), IMPORT);
60 reserved.put(INSTANCEOF.image(), INSTANCEOF);
61 reserved.put(INT.image(), INT);
62 reserved.put(NEW.image(), NEW);
63 reserved.put(NULL.image(), NULL);
64 reserved.put(PACKAGE.image(), PACKAGE);
65 reserved.put(PRIVATE.image(), PRIVATE);
66 reserved.put(PROTECTED.image(), PROTECTED);
67 reserved.put(PUBLIC.image(), PUBLIC);
68 reserved.put(RETURN.image(), RETURN);
69 reserved.put(STATIC.image(), STATIC);
70 reserved.put(SUPER.image(), SUPER);
71 reserved.put(THIS.image(), THIS);
72 reserved.put(TRUE.image(), TRUE);
73 reserved.put(VOID.image(), VOID);
74 reserved.put(WHILE.image(), WHILE);
75
76 nextCh();
78 }
79
80
85 public TokenInfo getNextToken() {
86 StringBuffer buffer;
87 boolean moreWhiteSpace = true;
88 while (moreWhiteSpace) {
89 while (isWhitespace(ch)) {
90 nextCh();
91 }
92 if (ch == '/') {
93 nextCh();
94 if (ch == '/') {
95 while (ch != '\n' && ch != EOFCH) {
97 nextCh();
98 }
99 } else {
100 reportScannerError("Operator / is not supported in j--");
101 }
102 } else {
103 moreWhiteSpace = false;
104 }
105 }
106 line = input.line();
107 switch (ch) {
108 case ',':
109 nextCh();
110 return new TokenInfo(COMMA, line);
111 case '.':
112 nextCh();
113 return new TokenInfo(DOT, line);
114 case '[':
115 nextCh();
116 return new TokenInfo(LBRACK, line);
117 case '{':
118 nextCh();
119 return new TokenInfo(LCURLY, line);
120 case '(':
121 nextCh();
122 return new TokenInfo(LPAREN, line);
123 case ']':
124 nextCh();
125 return new TokenInfo(RBRACK, line);
126 case '}':
127 nextCh();
128 return new TokenInfo(RCURLY, line);
129 case ')':
130 nextCh();
131 return new TokenInfo(RPAREN, line);
132 case ';':
133 nextCh();
134 return new TokenInfo(SEMI, line);
135 case '*':
136 nextCh();
137 return new TokenInfo(STAR, line);
138 case '+':
139 nextCh();
140 if (ch == '=') {
141 nextCh();
142 return new TokenInfo(PLUS_ASSIGN, line);
143 } else if (ch == '+') {
144 nextCh();
145 return new TokenInfo(INC, line);
146 } else {
147 return new TokenInfo(PLUS, line);
148 }
149 case '-':
150 nextCh();
151 if (ch == '-') {
152 nextCh();
153 return new TokenInfo(DEC, line);
154 } else {
155 return new TokenInfo(MINUS, line);
156 }
157 case '=':
158 nextCh();
159 if (ch == '=') {
160 nextCh();
161 return new TokenInfo(EQUAL, line);
162 } else {
163 return new TokenInfo(ASSIGN, line);
164 }
165 case '>':
166 nextCh();
167 return new TokenInfo(GT, line);
168 case '<':
169 nextCh();
170 if (ch == '=') {
171 nextCh();
172 return new TokenInfo(LE, line);
173 } else {
174 reportScannerError("Operator < is not supported in j--");
175 return getNextToken();
176 }
177 case '!':
178 nextCh();
179 return new TokenInfo(LNOT, line);
180 case '&':
181 nextCh();
182 if (ch == '&') {
183 nextCh();
184 return new TokenInfo(LAND, line);
185 } else {
186 reportScannerError("Operator & is not supported in j--");
187 return getNextToken();
188 }
189 case '\'':
190 buffer = new StringBuffer();
191 buffer.append('\'');
192 nextCh();
193 if (ch == '\\') {
194 nextCh();
195 buffer.append(escape());
196 } else {
197 buffer.append(ch);
198 nextCh();
199 }
200 if (ch == '\'') {
201 buffer.append('\'');
202 nextCh();
203 return new TokenInfo(CHAR_LITERAL, buffer.toString(), line);
204 } else {
205 reportScannerError(ch + " found by scanner where closing ' was expected");
207 while (ch != '\'' && ch != ';' && ch != '\n') {
208 nextCh();
209 }
210 return new TokenInfo(CHAR_LITERAL, buffer.toString(), line);
211 }
212 case '"':
213 buffer = new StringBuffer();
214 buffer.append("\"");
215 nextCh();
216 while (ch != '"' && ch != '\n' && ch != EOFCH) {
217 if (ch == '\\') {
218 nextCh();
219 buffer.append(escape());
220 } else {
221 buffer.append(ch);
222 nextCh();
223 }
224 }
225 if (ch == '\n') {
226 reportScannerError("Unexpected end of line found in string");
227 } else if (ch == EOFCH) {
228 reportScannerError("Unexpected end of file found in string");
229 } else {
230 nextCh();
232 buffer.append("\"");
233 }
234 return new TokenInfo(STRING_LITERAL, buffer.toString(), line);
235 case EOFCH:
236 return new TokenInfo(EOF, line);
237 case '0':
238 case '1':
239 case '2':
240 case '3':
241 case '4':
242 case '5':
243 case '6':
244 case '7':
245 case '8':
246 case '9':
247 buffer = new StringBuffer();
248 while (isDigit(ch)) {
249 buffer.append(ch);
250 nextCh();
251 }
252 return new TokenInfo(INT_LITERAL, buffer.toString(), line);
253 default:
254 if (isIdentifierStart(ch)) {
255 buffer = new StringBuffer();
256 while (isIdentifierPart(ch)) {
257 buffer.append(ch);
258 nextCh();
259 }
260 String identifier = buffer.toString();
261 if (reserved.containsKey(identifier)) {
262 return new TokenInfo(reserved.get(identifier), line);
263 } else {
264 return new TokenInfo(IDENTIFIER, identifier, line);
265 }
266 } else {
267 reportScannerError("Unidentified input token: '%c'", ch);
268 nextCh();
269 return getNextToken();
270 }
271 }
272 }
273
274
279 public boolean errorHasOccurred() {
280 return isInError;
281 }
282
283
288 public String fileName() {
289 return fileName;
290 }
291
292 private String escape() {
294 switch (ch) {
295 case 'b':
296 nextCh();
297 return "\\b";
298 case 't':
299 nextCh();
300 return "\\t";
301 case 'n':
302 nextCh();
303 return "\\n";
304 case 'f':
305 nextCh();
306 return "\\f";
307 case 'r':
308 nextCh();
309 return "\\r";
310 case '"':
311 nextCh();
312 return "\\\"";
313 case '\'':
314 nextCh();
315 return "\\'";
316 case '\\':
317 nextCh();
318 return "\\\\";
319 default:
320 reportScannerError("Badly formed escape: \\%c", ch);
321 nextCh();
322 return "";
323 }
324 }
325
326 private void nextCh() {
328 line = input.line();
329 try {
330 ch = input.nextChar();
331 } catch (Exception e) {
332 reportScannerError("Unable to read characters from input");
333 }
334 }
335
336 private void reportScannerError(String message, Object... args) {
339 isInError = true;
340 System.err.printf("%s:%d: error: ", fileName, line);
341 System.err.printf(message, args);
342 System.err.println();
343 }
344
345 private boolean isDigit(char c) {
347 return (c >= '0' && c <= '9');
348 }
349
350 private boolean isWhitespace(char c) {
352 return (c == ' ' || c == '\t' || c == '\n' || c == '\f');
353 }
354
355 private boolean isIdentifierStart(char c) {
357 return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' || c == '$');
358 }
359
360 private boolean isIdentifierPart(char c) {
363 return (isIdentifierStart(c) || isDigit(c));
364 }
365 }
366
367
371 class CharReader {
372 public final static char EOFCH = (char) -1;
374
375 private LineNumberReader lineNumberReader;
377
378 private String fileName;
380
381
387 public CharReader(String fileName) throws FileNotFoundException {
388 lineNumberReader = new LineNumberReader(new FileReader(fileName));
389 this.fileName = fileName;
390 }
391
392
398 public char nextChar() throws IOException {
399 return (char) lineNumberReader.read();
400 }
401
402
407 public int line() {
408 return lineNumberReader.getLineNumber() + 1; }
410
411
416 public String fileName() {
417 return fileName;
418 }
419
420
425 public void close() throws IOException {
426 lineNumberReader.close();
427 }
428 }
429