import java.util.*; import java.util.logging.FileHandler; import java.*; import java.io.*; public class Scanner { private static final int EOF = 255; // This means the end of file private int sym; // The current token on the input, 0 = error token, 255 = end-of-file token public int val; // value of the last number encountered public int id; // id of the last identifier encountered private char curr_char; //This is meant to keep the current character read private boolean errorCondition = false; //To indicate the existance of error private boolean endSeen = false; private boolean ofSeen = false; private FileReader fileHandler; public ArrayList identifierList; public Scanner(String fileName) { identifierList = new ArrayList(); fileHandler = new FileReader(fileName); curr_char = fileHandler.GetSym(); Next(); }//End of constructor private void Next() { boolean StringStarted = false; /*I need this variable to be able to know the end of a string * or to make sure for example if I see a space that space * is just meant to divide a string to different tokens or it is * redundant space*/ String token = ""; //curr_char = fileHandler.GetSym(); if(curr_char == EOF) { sym = EOF; return; } while((curr_char != 0) && (curr_char != EOF)) { if(curr_char == ' ') { if(StringStarted) { stringTokenHandler(token); return; } curr_char = fileHandler.GetSym(); continue; } if(curr_char == '*') { if(StringStarted) { stringTokenHandler(token); return; } sym = 1; curr_char = fileHandler.GetSym(); return; } if(curr_char == '/') { if(StringStarted) { stringTokenHandler(token); return; } sym = 2; curr_char = fileHandler.GetSym(); return; } if(curr_char == '+') { if(StringStarted) { stringTokenHandler(token); return; } sym = 11; curr_char = fileHandler.GetSym(); return; } if(curr_char == '-') { if(StringStarted) { stringTokenHandler(token); return; } sym = 12; curr_char = fileHandler.GetSym(); return; } if(curr_char == '=') { if(StringStarted) { stringTokenHandler(token); return; } curr_char = fileHandler.GetSym(); if(curr_char != '=') { Error("A single = is not defined in the grammar"); return; } sym = 20; curr_char = fileHandler.GetSym(); return; } if(curr_char == '!') { if(StringStarted) { stringTokenHandler(token); return; } curr_char = fileHandler.GetSym(); if(curr_char != '=') { Error("A single ! is not defined in the grammar"); return; } sym = 21; curr_char = fileHandler.GetSym(); return; } if(curr_char == '<') { if(StringStarted) { stringTokenHandler(token); return; } curr_char = fileHandler.GetSym(); if(curr_char == '=') { sym = 24; curr_char = fileHandler.GetSym(); return; } if(curr_char == '-') { sym = 40; curr_char = fileHandler.GetSym(); return; } sym = 22; return; } if(curr_char == '>') { if(StringStarted) { stringTokenHandler(token); return; } curr_char = fileHandler.GetSym(); if(curr_char == '=') { sym = 23; curr_char = fileHandler.GetSym(); return; } sym = 25; return; } if(curr_char == '.') { if(StringStarted) { stringTokenHandler(token); return; } sym = 30; curr_char = fileHandler.GetSym(); return; } if(curr_char == ',') { if(StringStarted) { stringTokenHandler(token); return; } sym = 31; curr_char = fileHandler.GetSym(); return; } if(curr_char == ')') { if(StringStarted) { stringTokenHandler(token); return; } sym = 35; curr_char = fileHandler.GetSym(); return; } if(curr_char == ']') { if(StringStarted) { stringTokenHandler(token); return; } sym = 34; curr_char = fileHandler.GetSym(); return; } if(curr_char == '(') { if(StringStarted) { stringTokenHandler(token); return; } sym = 50; curr_char = fileHandler.GetSym(); return; } if(curr_char == '[') { if(StringStarted) { stringTokenHandler(token); return; } sym = 51; curr_char = fileHandler.GetSym(); return; } if(curr_char == ';') { if(StringStarted) { stringTokenHandler(token); return; } sym = 70; curr_char = fileHandler.GetSym(); return; } if(curr_char == '}') { if(StringStarted) { stringTokenHandler(token); return; } sym = 80; curr_char = fileHandler.GetSym(); return; } if(curr_char == '{') { if(StringStarted) { stringTokenHandler(token); return; } sym = 150; curr_char = fileHandler.GetSym(); return; } //The remaining cases are either string or numbers or reserved words StringStarted = true; token = token + curr_char; curr_char = fileHandler.GetSym(); }//End of while if(curr_char == 0) { sym = 0; return; } if(curr_char == EOF) { if(StringStarted) { stringTokenHandler(token); return; } sym = EOF; return; } }//End of Scanner Next private void stringTokenHandler(String token) { if(token.compareTo("end") == 0) { endSeen = true; Next(); if(sym == -5) { Next(); if(sym == -10) { sym = 255; endSeen = false; ofSeen = false; return; } Error("\"end of\" is a reserved string. It has to be followed by \"file\" and can not be used alone"); } Error("\"end\" is a reserved word. It has to be followed by \"of file\" and can not be used alone"); return; } if(token.compareTo("of") == 0) { if(!endSeen) { Error("\"of\" is a reserved word for \"end of file\" and can be used nowhere esle."); return; } ofSeen = true; sym = -5; /* Just to identify "of" so that it can later be used to * verify that "end", "of" and "file" have all been used. * Because otherwise it is a syntax error. */ return; } if(token.compareTo("file") == 0) { if(!ofSeen) { Error("\"file\" is a reserved word for \"end of file\" and can be used nowhere esle."); return; } sym = -10; /* Just to identify "file" so that it can later be used to * verify that "end", "of" and "file" have all been used. * Because otherwise it is a syntax error. */ return; } if(token.compareTo("then") == 0) { sym = 41; return; } if(token.compareTo("do") == 0) { sym = 42; return; } if(token.compareTo("od") == 0) { sym = 81; return; } if(token.compareTo("fi") == 0) { sym = 82; return; } if(token.compareTo("else") == 0) { sym = 90; return; } if(token.compareTo("let") == 0) { sym = 100; return; } if(token.compareTo("call") == 0) { sym = 101; return; } if(token.compareTo("if") == 0) { sym = 102; return; } if(token.compareTo("while") == 0) { sym = 103; return; } if(token.compareTo("return") == 0) { sym = 104; return; } if(token.compareTo("var") == 0) { sym = 110; return; } if(token.compareTo("function") == 0) { sym = 111; return; } if(token.compareTo("procedure") == 0) { sym = 112; return; } if(token.compareTo("array") == 0) { sym = 113; return; } if(token.compareTo("main") == 0) { sym = 200; return; } if(((token.charAt(0)) < 58) && ((token.charAt(0)) > 47)) { /* A number is expected to be seen here*/ int value = 0; int ten_coeff = 1; for(int i = token.length() - 1; i > -1; i--) { if (!((token.charAt(i) < 58) && (token.charAt(i) > 47))) { Error("This token is not a number nor it is an identifier!"); return; } value = value + ten_coeff*(token.charAt(i)-48); ten_coeff = 10*ten_coeff; } sym = 60; val = value; return; } if(((token.charAt(0) < 91) && (token.charAt(0) > 64)) || ((token.charAt(0) < 123) && (token.charAt(0) > 96))) { for(int i = 1; i < token.length(); i++) { if (!(((token.charAt(i) < 91) && (token.charAt(i) > 64)) || ((token.charAt(i) < 123) && (token.charAt(i) > 96)) || ((token.charAt(i) < 58) && (token.charAt(i) > 47)))) { Error("This token is expected to be an identifier but it contains non-permitted characters."); return; } } /* if we reach this point it means that an identifier has been * detected and now we have to insert the identifier to the list. * We should first check to see if the identifier is already inserted * in the list. If yes we set the id to that index. Otherwise we add * the new identifier to the list */ id = identifierList.indexOf(token); if(id == -1) { identifierList.add(token); id = identifierList.size() - 1; } sym = 61; return; } }//End of stringTokenHandler method public int GetSym() { System.out.println("scanner getsym is:" + sym); int temp = sym; if(sym == 255) return 255; Next(); return temp; }//End of GetSym method public void Error(String errorMsg) { System.out.println(errorMsg); fileHandler.Error(errorMsg); return; } /* identifier table methods */ /* This method is intended to provide the identifier * and as an input it accepts the index */ public String Id2String(int id) { return ((String)identifierList.get(id)); } /* This method is intended to provide the index of the identifier * in the List. It recieves the identifier as an input and outputs the index */ public int String2Id(String name) { return (identifierList.indexOf(name)); } } // end of class Scanner