1   // Copyright 2012- Bill Campbell, Swami Iyer and Bahar Akbal-Delibas
2   
3   package jminusminus;
4   
5   import java.io.FileNotFoundException;
6   import java.io.FileReader;
7   import java.io.IOException;
8   import java.io.LineNumberReader;
9   import java.util.Hashtable;
10  
11  import static jminusminus.TokenKind.*;
12  
13  /**
14   * A lexical analyzer for j--, that has no backtracking mechanism.
15   */
16  class Scanner {
17      // End of file character.
18      public final static char EOFCH = CharReader.EOFCH;
19  
20      // Keywords in j--.
21      private Hashtable<String, TokenKind> reserved;
22  
23      // Source characters.
24      private CharReader input;
25  
26      // Next unscanned character.
27      private char ch;
28  
29      // Whether a scanner error has been found.
30      private boolean isInError;
31  
32      // Source file name.
33      private String fileName;
34  
35      // Line number of current token.
36      private int line;
37  
38      /**
39       * Constructs a Scanner from a file name.
40       *
41       * @param fileName name of the source file.
42       * @throws FileNotFoundException when the named file cannot be found.
43       */
44      public Scanner(String fileName) throws FileNotFoundException {
45          this.input = new CharReader(fileName);
46          this.fileName = fileName;
47          isInError = false;
48  
49          // Keywords in j--
50          reserved = new Hashtable<String, TokenKind>();
51          reserved.put(ABSTRACT.image(), ABSTRACT);
52          reserved.put(BOOLEAN.image(), BOOLEAN);
53          reserved.put(CHAR.image(), CHAR);
54          reserved.put(CLASS.image(), CLASS);
55          reserved.put(ELSE.image(), ELSE);
56          reserved.put(EXTENDS.image(), EXTENDS);
57          reserved.put(FALSE.image(), FALSE);
58          reserved.put(IF.image(), IF);
59          reserved.put(IMPORT.image(), IMPORT);
60          reserved.put(INSTANCEOF.image(), INSTANCEOF);
61          reserved.put(INT.image(), INT);
62          reserved.put(NEW.image(), NEW);
63          reserved.put(NULL.image(), NULL);
64          reserved.put(PACKAGE.image(), PACKAGE);
65          reserved.put(PRIVATE.image(), PRIVATE);
66          reserved.put(PROTECTED.image(), PROTECTED);
67          reserved.put(PUBLIC.image(), PUBLIC);
68          reserved.put(RETURN.image(), RETURN);
69          reserved.put(STATIC.image(), STATIC);
70          reserved.put(SUPER.image(), SUPER);
71          reserved.put(THIS.image(), THIS);
72          reserved.put(TRUE.image(), TRUE);
73          reserved.put(VOID.image(), VOID);
74          reserved.put(WHILE.image(), WHILE);
75  
76          // Prime the pump.
77          nextCh();
78      }
79  
80      /**
81       * Scans and returns the next token from input.
82       *
83       * @return the next scanned token.
84       */
85      public TokenInfo getNextToken() {
86          StringBuffer buffer;
87          boolean moreWhiteSpace = true;
88          while (moreWhiteSpace) {
89              while (isWhitespace(ch)) {
90                  nextCh();
91              }
92              if (ch == '/') {
93                  nextCh();
94                  if (ch == '/') {
95                      // CharReader maps all new lines to '\n'.
96                      while (ch != '\n' && ch != EOFCH) {
97                          nextCh();
98                      }
99                  } else {
100                     reportScannerError("Operator / is not supported in j--");
101                 }
102             } else {
103                 moreWhiteSpace = false;
104             }
105         }
106         line = input.line();
107         switch (ch) {
108             case ',':
109                 nextCh();
110                 return new TokenInfo(COMMA, line);
111             case '.':
112                 nextCh();
113                 return new TokenInfo(DOT, line);
114             case '[':
115                 nextCh();
116                 return new TokenInfo(LBRACK, line);
117             case '{':
118                 nextCh();
119                 return new TokenInfo(LCURLY, line);
120             case '(':
121                 nextCh();
122                 return new TokenInfo(LPAREN, line);
123             case ']':
124                 nextCh();
125                 return new TokenInfo(RBRACK, line);
126             case '}':
127                 nextCh();
128                 return new TokenInfo(RCURLY, line);
129             case ')':
130                 nextCh();
131                 return new TokenInfo(RPAREN, line);
132             case ';':
133                 nextCh();
134                 return new TokenInfo(SEMI, line);
135             case '*':
136                 nextCh();
137                 return new TokenInfo(STAR, line);
138             case '+':
139                 nextCh();
140                 if (ch == '=') {
141                     nextCh();
142                     return new TokenInfo(PLUS_ASSIGN, line);
143                 } else if (ch == '+') {
144                     nextCh();
145                     return new TokenInfo(INC, line);
146                 } else {
147                     return new TokenInfo(PLUS, line);
148                 }
149             case '-':
150                 nextCh();
151                 if (ch == '-') {
152                     nextCh();
153                     return new TokenInfo(DEC, line);
154                 } else {
155                     return new TokenInfo(MINUS, line);
156                 }
157             case '=':
158                 nextCh();
159                 if (ch == '=') {
160                     nextCh();
161                     return new TokenInfo(EQUAL, line);
162                 } else {
163                     return new TokenInfo(ASSIGN, line);
164                 }
165             case '>':
166                 nextCh();
167                 return new TokenInfo(GT, line);
168             case '<':
169                 nextCh();
170                 if (ch == '=') {
171                     nextCh();
172                     return new TokenInfo(LE, line);
173                 } else {
174                     reportScannerError("Operator < is not supported in j--");
175                     return getNextToken();
176                 }
177             case '!':
178                 nextCh();
179                 return new TokenInfo(LNOT, line);
180             case '&':
181                 nextCh();
182                 if (ch == '&') {
183                     nextCh();
184                     return new TokenInfo(LAND, line);
185                 } else {
186                     reportScannerError("Operator & is not supported in j--");
187                     return getNextToken();
188                 }
189             case '\'':
190                 buffer = new StringBuffer();
191                 buffer.append('\'');
192                 nextCh();
193                 if (ch == '\\') {
194                     nextCh();
195                     buffer.append(escape());
196                 } else {
197                     buffer.append(ch);
198                     nextCh();
199                 }
200                 if (ch == '\'') {
201                     buffer.append('\'');
202                     nextCh();
203                     return new TokenInfo(CHAR_LITERAL, buffer.toString(), line);
204                 } else {
205                     // Expected a ' ; report error and try to recover.
206                     reportScannerError(ch + " found by scanner where closing ' was expected");
207                     while (ch != '\'' && ch != ';' && ch != '\n') {
208                         nextCh();
209                     }
210                     return new TokenInfo(CHAR_LITERAL, buffer.toString(), line);
211                 }
212             case '"':
213                 buffer = new StringBuffer();
214                 buffer.append("\"");
215                 nextCh();
216                 while (ch != '"' && ch != '\n' && ch != EOFCH) {
217                     if (ch == '\\') {
218                         nextCh();
219                         buffer.append(escape());
220                     } else {
221                         buffer.append(ch);
222                         nextCh();
223                     }
224                 }
225                 if (ch == '\n') {
226                     reportScannerError("Unexpected end of line found in string");
227                 } else if (ch == EOFCH) {
228                     reportScannerError("Unexpected end of file found in string");
229                 } else {
230                     // Scan the closing "
231                     nextCh();
232                     buffer.append("\"");
233                 }
234                 return new TokenInfo(STRING_LITERAL, buffer.toString(), line);
235             case EOFCH:
236                 return new TokenInfo(EOF, line);
237             case '0':
238             case '1':
239             case '2':
240             case '3':
241             case '4':
242             case '5':
243             case '6':
244             case '7':
245             case '8':
246             case '9':
247                 buffer = new StringBuffer();
248                 while (isDigit(ch)) {
249                     buffer.append(ch);
250                     nextCh();
251                 }
252                 return new TokenInfo(INT_LITERAL, buffer.toString(), line);
253             default:
254                 if (isIdentifierStart(ch)) {
255                     buffer = new StringBuffer();
256                     while (isIdentifierPart(ch)) {
257                         buffer.append(ch);
258                         nextCh();
259                     }
260                     String identifier = buffer.toString();
261                     if (reserved.containsKey(identifier)) {
262                         return new TokenInfo(reserved.get(identifier), line);
263                     } else {
264                         return new TokenInfo(IDENTIFIER, identifier, line);
265                     }
266                 } else {
267                     reportScannerError("Unidentified input token: '%c'", ch);
268                     nextCh();
269                     return getNextToken();
270                 }
271         }
272     }
273 
274     /**
275      * Returns true if an error has occurred, and false otherwise.
276      *
277      * @return true if an error has occurred, and false otherwise.
278      */
279     public boolean errorHasOccurred() {
280         return isInError;
281     }
282 
283     /**
284      * Returns the name of the source file.
285      *
286      * @return the name of the source file.
287      */
288     public String fileName() {
289         return fileName;
290     }
291 
292     // Scans and returns an escaped character.
293     private String escape() {
294         switch (ch) {
295             case 'b':
296                 nextCh();
297                 return "\\b";
298             case 't':
299                 nextCh();
300                 return "\\t";
301             case 'n':
302                 nextCh();
303                 return "\\n";
304             case 'f':
305                 nextCh();
306                 return "\\f";
307             case 'r':
308                 nextCh();
309                 return "\\r";
310             case '"':
311                 nextCh();
312                 return "\\\"";
313             case '\'':
314                 nextCh();
315                 return "\\'";
316             case '\\':
317                 nextCh();
318                 return "\\\\";
319             default:
320                 reportScannerError("Badly formed escape: \\%c", ch);
321                 nextCh();
322                 return "";
323         }
324     }
325 
326     // Advances ch to the next character from input, and updates the line number.
327     private void nextCh() {
328         line = input.line();
329         try {
330             ch = input.nextChar();
331         } catch (Exception e) {
332             reportScannerError("Unable to read characters from input");
333         }
334     }
335 
336     // Reports a lexical error and records the fact that an error has occurred. This fact can be
337     // ascertained from the Scanner by sending it an errorHasOccurred message.
338     private void reportScannerError(String message, Object... args) {
339         isInError = true;
340         System.err.printf("%s:%d: error: ", fileName, line);
341         System.err.printf(message, args);
342         System.err.println();
343     }
344 
345     // Returns true if the specified character is a digit (0-9), and false otherwise.
346     private boolean isDigit(char c) {
347         return (c >= '0' && c <= '9');
348     }
349 
350     // Returns true if the specified character is a whitespace, and false otherwise.
351     private boolean isWhitespace(char c) {
352         return (c == ' ' || c == '\t' || c == '\n' || c == '\f');
353     }
354 
355     // Returns true if the specified character can start an identifier name, and false otherwise.
356     private boolean isIdentifierStart(char c) {
357         return (c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c == '_' || c == '$');
358     }
359 
360     // Returns true if the specified character can be part of an identifier name, and false
361     // otherwise.
362     private boolean isIdentifierPart(char c) {
363         return (isIdentifierStart(c) || isDigit(c));
364     }
365 }
366 
367 /**
368  * A buffered character reader, which abstracts out differences between platforms, mapping all new
369  * lines to '\n', and also keeps track of line numbers.
370  */
371 class CharReader {
372     // Representation of the end of file as a character.
373     public final static char EOFCH = (char) -1;
374 
375     // The underlying reader records line numbers.
376     private LineNumberReader lineNumberReader;
377 
378     // Name of the file that is being read.
379     private String fileName;
380 
381     /**
382      * Constructs a CharReader from a file name.
383      *
384      * @param fileName the name of the input file.
385      * @throws FileNotFoundException if the file is not found.
386      */
387     public CharReader(String fileName) throws FileNotFoundException {
388         lineNumberReader = new LineNumberReader(new FileReader(fileName));
389         this.fileName = fileName;
390     }
391 
392     /**
393      * Scans and returns the next character.
394      *
395      * @return the character scanned.
396      * @throws IOException if an I/O error occurs.
397      */
398     public char nextChar() throws IOException {
399         return (char) lineNumberReader.read();
400     }
401 
402     /**
403      * Returns the current line number in the source file.
404      *
405      * @return the current line number in the source file.
406      */
407     public int line() {
408         return lineNumberReader.getLineNumber() + 1; // LineNumberReader counts lines from 0
409     }
410 
411     /**
412      * Returns the file name.
413      *
414      * @return the file name.
415      */
416     public String fileName() {
417         return fileName;
418     }
419 
420     /**
421      * Closes the file.
422      *
423      * @throws IOException if an I/O error occurs.
424      */
425     public void close() throws IOException {
426         lineNumberReader.close();
427     }
428 }
429