1   // Copyright 2013 Bill Campbell, Swami Iyer and Bahar Akbal-Delibas
2   
3   package jminusminus;
4   
5   import java.io.FileNotFoundException;
6   import java.io.FileReader;
7   import java.io.IOException;
8   import java.io.LineNumberReader;
9   import java.util.Hashtable;
10  import static jminusminus.TokenKind.*;
11  
12  /**
13   * A lexical analyzer for j--, that has no backtracking mechanism.
14   * 
15   * When you add a new token to the scanner, you must also add an entry in the
16   * TokenKind enum in TokenInfo.java specifying the kind and image of the new
17   * token.
18   */
19  
20  class Scanner {
21  
22      /** End of file character. */
23      public final static char EOFCH = CharReader.EOFCH;
24  
25      /** Keywords in j--. */
26      private Hashtable<String, TokenKind> reserved;
27  
28      /** Source characters. */
29      private CharReader input;
30  
31      /** Next unscanned character. */
32      private char ch;
33  
34      /** Whether a scanner error has been found. */
35      private boolean isInError;
36  
37      /** Source file name. */
38      private String fileName;
39  
40      /** Line number of current token. */
41      private int line;
42  
43      /**
44       * Construct a Scanner object.
45       * 
46       * @param fileName
47       *            the name of the file containing the source.
48       * @exception FileNotFoundException
49       *                when the named file cannot be found.
50       */
51  
52      public Scanner(String fileName) throws FileNotFoundException {
53          this.input = new CharReader(fileName);
54          this.fileName = fileName;
55          isInError = false;
56  
57          // Keywords in j--
58          reserved = new Hashtable<String, TokenKind>();
59          reserved.put(ABSTRACT.image(), ABSTRACT);
60          reserved.put(BOOLEAN.image(), BOOLEAN);
61          reserved.put(CHAR.image(), CHAR);
62          reserved.put(CLASS.image(), CLASS);
63          reserved.put(ELSE.image(), ELSE);
64          reserved.put(EXTENDS.image(), EXTENDS);
65          reserved.put(FALSE.image(), FALSE);
66          reserved.put(IF.image(), IF);
67          reserved.put(IMPORT.image(), IMPORT);
68          reserved.put(INSTANCEOF.image(), INSTANCEOF);
69          reserved.put(INT.image(), INT);
70          reserved.put(NEW.image(), NEW);
71          reserved.put(NULL.image(), NULL);
72          reserved.put(PACKAGE.image(), PACKAGE);
73          reserved.put(PRIVATE.image(), PRIVATE);
74          reserved.put(PROTECTED.image(), PROTECTED);
75          reserved.put(PUBLIC.image(), PUBLIC);
76          reserved.put(RETURN.image(), RETURN);
77          reserved.put(STATIC.image(), STATIC);
78          reserved.put(SUPER.image(), SUPER);
79          reserved.put(THIS.image(), THIS);
80          reserved.put(TRUE.image(), TRUE);
81          reserved.put(VOID.image(), VOID);
82          reserved.put(WHILE.image(), WHILE);
83  
84          // Prime the pump.
85          nextCh();
86      }
87  
88      /**
89       * Scan the next token from input.
90       * 
91       * @return the the next scanned token.
92       */
93  
94      public TokenInfo getNextToken() {
95          StringBuffer buffer;
96          boolean moreWhiteSpace = true;
97          while (moreWhiteSpace) {
98              while (isWhitespace(ch)) {
99                  nextCh();
100             }
101             if (ch == '/') {
102                 nextCh();
103                 if (ch == '/') {
104                     // CharReader maps all new lines to '\n'
105                     while (ch != '\n' && ch != EOFCH) {
106                         nextCh();
107                     }
108                 } else {
109                     reportScannerError("Operator / is not supported in j--.");
110                 }
111             } else {
112                 moreWhiteSpace = false;
113             }
114         }
115         line = input.line();
116         switch (ch) {
117         case '(':
118             nextCh();
119             return new TokenInfo(LPAREN, line);
120         case ')':
121             nextCh();
122             return new TokenInfo(RPAREN, line);
123         case '{':
124             nextCh();
125             return new TokenInfo(LCURLY, line);
126         case '}':
127             nextCh();
128             return new TokenInfo(RCURLY, line);
129         case '[':
130             nextCh();
131             return new TokenInfo(LBRACK, line);
132         case ']':
133             nextCh();
134             return new TokenInfo(RBRACK, line);
135         case ';':
136             nextCh();
137             return new TokenInfo(SEMI, line);
138         case ',':
139             nextCh();
140             return new TokenInfo(COMMA, line);
141         case '=':
142             nextCh();
143             if (ch == '=') {
144                 nextCh();
145                 return new TokenInfo(EQUAL, line);
146             } else {
147                 return new TokenInfo(ASSIGN, line);
148             }
149         case '!':
150             nextCh();
151             return new TokenInfo(LNOT, line);
152         case '*':
153             nextCh();
154             return new TokenInfo(STAR, line);
155         case '+':
156             nextCh();
157             if (ch == '=') {
158                 nextCh();
159                 return new TokenInfo(PLUS_ASSIGN, line);
160             } else if (ch == '+') {
161                 nextCh();
162                 return new TokenInfo(INC, line);
163             } else {
164                 return new TokenInfo(PLUS, line);
165             }
166         case '-':
167             nextCh();
168             if (ch == '-') {
169                 nextCh();
170                 return new TokenInfo(DEC, line);
171             } else {
172                 return new TokenInfo(MINUS, line);
173             }
174         case '&':
175             nextCh();
176             if (ch == '&') {
177                 nextCh();
178                 return new TokenInfo(LAND, line);
179             } else {
180                 reportScannerError("Operator & is not supported in j--.");
181                 return getNextToken();
182             }
183         case '>':
184             nextCh();
185             return new TokenInfo(GT, line);
186         case '<':
187             nextCh();
188             if (ch == '=') {
189                 nextCh();
190                 return new TokenInfo(LE, line);
191             } else {
192                 reportScannerError("Operator < is not supported in j--.");
193                 return getNextToken();
194             }
195         case '\'':
196             buffer = new StringBuffer();
197             buffer.append('\'');
198             nextCh();
199             if (ch == '\\') {
200                 nextCh();
201                 buffer.append(escape());
202             } else {
203                 buffer.append(ch);
204                 nextCh();
205             }
206             if (ch == '\'') {
207                 buffer.append('\'');
208                 nextCh();
209                 return new TokenInfo(CHAR_LITERAL, buffer.toString(), line);
210             } else {
211                 // Expected a ' ; report error and try to
212                 // recover.
213                 reportScannerError(ch
214                         + " found by scanner where closing ' was expected.");
215                 while (ch != '\'' && ch != ';' && ch != '\n') {
216                     nextCh();
217                 }
218                 return new TokenInfo(CHAR_LITERAL, buffer.toString(), line);
219             }
220         case '"':
221             buffer = new StringBuffer();
222             buffer.append("\"");
223             nextCh();
224             while (ch != '"' && ch != '\n' && ch != EOFCH) {
225                 if (ch == '\\') {
226                     nextCh();
227                     buffer.append(escape());
228                 } else {
229                     buffer.append(ch);
230                     nextCh();
231                 }
232             }
233             if (ch == '\n') {
234                 reportScannerError("Unexpected end of line found in String");
235             } else if (ch == EOFCH) {
236                 reportScannerError("Unexpected end of file found in String");
237             } else {
238                 // Scan the closing "
239                 nextCh();
240                 buffer.append("\"");
241             }
242             return new TokenInfo(STRING_LITERAL, buffer.toString(), line);
243         case '.':
244             nextCh();
245             return new TokenInfo(DOT, line);
246         case EOFCH:
247             return new TokenInfo(EOF, line);
248         case '0':
249             // Handle only simple decimal integers for now.
250             nextCh();
251             return new TokenInfo(INT_LITERAL, "0", line);
252         case '1':
253         case '2':
254         case '3':
255         case '4':
256         case '5':
257         case '6':
258         case '7':
259         case '8':
260         case '9':
261             buffer = new StringBuffer();
262             while (isDigit(ch)) {
263                 buffer.append(ch);
264                 nextCh();
265             }
266             return new TokenInfo(INT_LITERAL, buffer.toString(), line);
267         default:
268             if (isIdentifierStart(ch)) {
269                 buffer = new StringBuffer();
270                 while (isIdentifierPart(ch)) {
271                     buffer.append(ch);
272                     nextCh();
273                 }
274                 String identifier = buffer.toString();
275                 if (reserved.containsKey(identifier)) {
276                     return new TokenInfo(reserved.get(identifier), line);
277                 } else {
278                     return new TokenInfo(IDENTIFIER, identifier, line);
279                 }
280             } else {
281                 reportScannerError("Unidentified input token: '%c'", ch);
282                 nextCh();
283                 return getNextToken();
284             }
285         }
286     }
287 
288     /**
289      * Scan and return an escaped character.
290      * 
291      * @return escaped character.
292      */
293 
294     private String escape() {
295         switch (ch) {
296         case 'b':
297             nextCh();
298             return "\\b";
299         case 't':
300             nextCh();
301             return "\\t";
302         case 'n':
303             nextCh();
304             return "\\n";
305         case 'f':
306             nextCh();
307             return "\\f";
308         case 'r':
309             nextCh();
310             return "\\r";
311         case '"':
312             nextCh();
313             return "\"";
314         case '\'':
315             nextCh();
316             return "\\'";
317         case '\\':
318             nextCh();
319             return "\\\\";
320         default:
321             reportScannerError("Badly formed escape: \\%c", ch);
322             nextCh();
323             return "";
324         }
325     }
326 
327     /**
328      * Advance ch to the next character from input, and update the line number.
329      */
330 
331     private void nextCh() {
332         line = input.line();
333         try {
334             ch = input.nextChar();
335         } catch (Exception e) {
336             reportScannerError("Unable to read characters from input");
337         }
338     }
339 
340     /**
341      * Report a lexcial error and record the fact that an error has occured.
342      * This fact can be ascertained from the Scanner by sending it an
343      * errorHasOccurred() message.
344      * 
345      * @param message
346      *            message identifying the error.
347      * @param args
348      *            related values.
349      */
350 
351     private void reportScannerError(String message, Object... args) {
352         isInError = true;
353         System.err.printf("%s:%d: ", fileName, line);
354         System.err.printf(message, args);
355         System.err.println();
356     }
357 
358     /**
359      * Return true if the specified character is a digit (0-9); false otherwise.
360      * 
361      * @param c
362      *            character.
363      * @return true or false.
364      */
365 
366     private boolean isDigit(char c) {
367         return (c >= '0' && c <= '9');
368     }
369 
370     /**
371      * Return true if the specified character is a whitespace; false otherwise.
372      * 
373      * @param c
374      *            character.
375      * @return true or false.
376      */
377 
378     private boolean isWhitespace(char c) {
379         switch (c) {
380         case ' ':
381         case '\t':
382         case '\n': // CharReader maps all new lines to '\n'
383         case '\f':
384             return true;
385         }
386         return false;
387     }
388 
389     /**
390      * Return true if the specified character can start an identifier name;
391      * false otherwise.
392      * 
393      * @param c
394      *            character.
395      * @return true or false.
396      */
397 
398     private boolean isIdentifierStart(char c) {
399         return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' || c == '$');
400     }
401 
402     /**
403      * Return true if the specified character can be part of an identifier name;
404      * false otherwise.
405      * 
406      * @param c
407      *            character.
408      * @return true or false.
409      */
410 
411     private boolean isIdentifierPart(char c) {
412         return (isIdentifierStart(c) || isDigit(c));
413     }
414 
415     /**
416      * Has an error occurred up to now in lexical analysis?
417      * 
418      * @return true or false.
419      */
420 
421     public boolean errorHasOccurred() {
422         return isInError;
423     }
424 
425     /**
426      * The name of the source file.
427      * 
428      * @return name of the source file.
429      */
430 
431     public String fileName() {
432         return fileName;
433     }
434 
435 }
436 
437 /**
438  * A buffered character reader. Abstracts out differences between platforms,
439  * mapping all new lines to '\n'. Also, keeps track of line numbers where the
440  * first line is numbered 1.
441  */
442 
443 class CharReader {
444 
445     /** A representation of the end of file as a character. */
446     public final static char EOFCH = (char) -1;
447 
448     /** The underlying reader records line numbers. */
449     private LineNumberReader lineNumberReader;
450 
451     /** Name of the file that is being read. */
452     private String fileName;
453 
454     /**
455      * Construct a CharReader from a file name.
456      * 
457      * @param fileName
458      *            the name of the input file.
459      * @exception FileNotFoundException
460      *                if the file is not found.
461      */
462 
463     public CharReader(String fileName) throws FileNotFoundException {
464         lineNumberReader = new LineNumberReader(new FileReader(fileName));
465         this.fileName = fileName;
466     }
467 
468     /**
469      * Scan the next character.
470      * 
471      * @return the character scanned.
472      * @exception IOException
473      *                if an I/O error occurs.
474      */
475 
476     public char nextChar() throws IOException {
477         return (char) lineNumberReader.read();
478     }
479 
480     /**
481      * The current line number in the source file, starting at 1.
482      * 
483      * @return the current line number.
484      */
485 
486     public int line() {
487         // LineNumberReader counts lines from 0.
488         return lineNumberReader.getLineNumber() + 1;
489     }
490 
491     /**
492      * Return the file name.
493      * 
494      * @return the file name.
495      */
496 
497     public String fileName() {
498         return fileName;
499     }
500 
501     /**
502      * Close the file.
503      * 
504      * @exception IOException
505      *                if an I/O error occurs.
506      */
507 
508     public void close() throws IOException {
509         lineNumberReader.close();
510     }
511 
512 }
513