使用Java的正则表达式功能,并对Tiny源程序代码做了一定的前提条件所完成的 Tiny 语言,其实就是模仿教材中的最终输出文件解析出每一个 Token。
默认每个Token之间都以空格“ ”隔开,因此可用Java中的正则表达式将每一行依此规律拆分为一个个Token,然后再对每一个Token进行类别匹配(也用到正则表达式),最后按类别打印输出。以下实现但是不能够处理多行注释的问题。
对教材中的源程序样例代码修改如下(加了不少空格变成sample2.tny):
扫描之后的输出结果:
代码实现:
- package lexical_analysis;
- import java.io.BufferedReader;
- import java.io.FileReader;
- import java.util.regex.Pattern;
- public class OriginalLexicalAnalyser {
- private static final int RESERVE_DWORD = 1;
- private static final int ARITHMETIC_SYMBOLS = 2;
- private static final int ID = 3;
- private static final int NUM = 4;
- // 保留字
- private String[] reservedWords = new String[] { "read", "if", "then",
- "repeat", "until", "write",
- "end" };
- // 数学运算符
- private String[] arithmeticSymbols = new String[] { "+", "-", "*", "/",
- "%", ":=", "=", "<",
- ">", "<=", ">=" };
- // 源程序文件输入流
- private BufferedReader sourceFile;
- // 代码行数
- private int lineCount = 0;
- boolean commentFlag = false;
- public OriginalLexicalAnalyser(String sourceFilePath) throws Exception {
- // 创建并加载源程序文件输入流
- this.sourceFile = new BufferedReader(new FileReader(sourceFilePath));
- }
- public void scan() throws Exception {
- String eachLine = "";
- while ((eachLine = this.sourceFile.readLine()) != null) {
- ++lineCount;
- System.out.printf("%2d: %s\n", lineCount, eachLine);
- int start = 0;
- int end = 0;
- int lineLen = eachLine.length();
- String nextChar;
- String token = "";
- if("}".equals(eachLine)) {
- commentFlag = false;
- printToken(eachLine);
- continue;
- }
- while (end < lineLen - 1) {
- nextChar = eachLine.substring(end, end + 1);
- // 上一行是多行注释开始,即 {
- if (commentFlag == true) {
- end = processComment(eachLine);
- token = eachLine.substring(start, end);
- printToken(token);
- } else {
- if (" ".equals(nextChar)) {
- token = eachLine.substring(start, end);
- printToken(token);
- start = end + 1;
- end = start;
- } else if (";".equals(nextChar)) {
- token = eachLine.substring(start, end);
- printToken(token);
- printToken(";");
- break;
- } else if("{".equals(nextChar)){
- commentFlag = true;
- start = end + 1;
- end = start;
- } else {
- end++;
- }
- }
- }
- }
- }
- private int processComment(String eachLine) {
- String ch;
- int start = 0;
- int lineLen = eachLine.length();
- for (int i = 1; i < lineLen; ++i) {
- ch = eachLine.substring(start, i);
- start++;
- if ("}".equals(ch)) {
- commentFlag = false;
- return i;
- }
- }
- return lineLen - 1;
- }
- private void printToken(String token) {
- if(isArithmeticSymbol(token)) { // 数学运算符
- System.out.println(" " + lineCount + ": " + token);
- } else if(isReservedWord(token)) { // 保留字
- if(lineCount == 7) {
- System.out.println("==========" + token + "===");
- }
- System.out.println(" " + lineCount + ": " + "reserved word: " + token);
- // 源程序文件结束符
- if("end".equals(token)) {
- System.out.printf("%2d: %s\n", ++lineCount, "EOF");
- }
- } else if(";".equals(token)) { // 行结束符,即分号
- System.out.println(" " + lineCount + ": " + token);
- } else if(isID(token)) { // 自定义标识符ID
- System.out.println(" " + lineCount + ": " + "ID, name= " + token);
- } else if(isNum(token)) { // 数值NUM
- System.out.println(" " + lineCount + ": " + "NUM, val= " + token);
- }
- }
- /**
- * 判断是否为“保留字”
- * @param token
- * @return
- */
- private boolean isReservedWord(String token) {
- int size = this.reservedWords.length;
- for(int i = 0; i < size; i++) {
- if(token.equals(reservedWords[i])) {
- return true;
- }
- }
- return false;
- }
- /**
- * 判断是否为“数学运算符”
- * @param token
- * @return
- */
- private boolean isArithmeticSymbol(String token) {
- int size = this.arithmeticSymbols.length;
- for(int i = 0; i < size; i++) {
- if(token.equals(arithmeticSymbols[i])) {
- return true;
- }
- }
- return false;
- }
- /**
- * 判断是否为“数值NUM”
- * @param token
- * @return
- */
- private boolean isNum(String token) {
- boolean flag = Pattern.matches("\\d+?", token);
- return flag;
- }
- /**
- * 判断是否为“ID”
- * @param token
- * @return
- */
- private boolean isID(String token) {
- boolean flag = Pattern.matches("[a-zA-Z]+?", token);
- return flag;
- }
- /**
- * “词法分析程序”的启动入口
- * @param args
- */
- public static void main(String[] args) throws Exception {
- String sourceFilePath = "sample2.tny";
- OriginalLexicalAnalyser lexicalAnalyser = new OriginalLexicalAnalyser(sourceFilePath);
- lexicalAnalyser.scan();
- }
- }
本文转自 xxxx66yyyy 51CTO博客,原文链接:http://blog.51cto.com/haolloyin/530061,如需转载请自行联系原作者