编译原理：一个完整的编译器前端

一个完整的编译器前端

这个翻译器的 Java 代码由五个包组成:main、lexer、symbol、parser 和 inter。包 inter 中包含的类处理用抽象语法表示的语言结构。

A.1源语言

A.2 Main

程序的执行从类 Main 的方法 main 开始。方法 main 创建了一个词法分析器和一个语法分析器，然后调用语法分析器中的方法 program。

package main;

import java.io.*;
import lexer.*;
import parser.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:44
 * @description 主程序
 */

public class Main {

  public static void main(String[] args) throws IOException {
    Lexer lex = new Lexer();
    Parser parse = new Parser(lex);
    parse.program();
    System.out.write('\n');
  }
}
/*
输入源代码：
{
    int i;
    int j;
    float v;
    float x;
    float[100] a;

    while (true) {
    do i = i + 1; while (a[i] < v);
    do j = j - 1; while (a[j] > v);
    if (i >= j) break;
    x = a[i];
    a[i] = a[j];
    a[j] = x;
    }
  }
输出中间代码：
L1:L3:    i = i + 1
L5:    t1 = i * 8
    t2 = a[t1]
    if t2 < v goto L3
L4:    j = j - 1
L7:    t3 = j * 8
    t4 = a[t3]
    if t4 > v goto L4
L6:    iffalse i >= j goto L8
L9:    goto L2
L8:    t5 = i * 8
    x = a[t5]
L10:    t6 = i * 8
    t7 = j * 8
    t8 = a[t7]
    a[t6] = t8
L11:    t9 = j * 8
    a[t9] = x
    goto L1
L2:

中间代码转换c代码（这个没写）
#include <stdio.h>
#include <stdbool.h>

int main() {
    int i = 0;
    int j = 0;
    float v = 0.0;
    float x = 0.0;
    float a[100] = {0.0};

    while (true) {
        do {
            i = i + 1;
        } while (a[i] < v);

        do {
            j = j - 1;
        } while (a[j] > v);

        if (i >= j) {
            break;
        }

        x = a[i];
        a[i] = a[j];
        a[j] = x;
    }

    return 0;
}
编译
gcc -o main main.c
运行
./main

  */

A.3 词法分析器

包 lexer 是2.6.5 节中的词法分析器的代码的扩展。类 Tag 定义了各个词法单元对应的常量:

其中的三个常量 INDEX、MINUS 和 TEMP 不是词法单元,它们将在抽象语法树中使用。

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:51
 * @description 词法单元对应的常量值，保留字
 */
public class Tag {

  public final static int
      AND = 256,
      BASIC = 257,
      BREAK = 258,
      DO = 259,
      ELSE = 260,
      EQ = 261,
      FALSE = 262,
      GE = 263,
      ID = 264,
      IF = 265,
      INDEX = 266,
      LE = 267,
      MINUS = 268,
      NE = 269,
      NUM = 270,
      OR = 271,
      REAL = 272,
      TEMP = 273,
      TRUE = 274,
      WHILE = 275;
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:59
 * @description 标识符，词法单元
 */
public class Token {

  public final int tag;

  public Token(int t) {
    tag = t;
  }

  public String toString() {
    return "" + (char) tag;
  }
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:02
 * @description 数字标识符
 */
public class Num extends Token{
  public final int value;

  public Num(int v) {
    super(Tag.NUM);
    value = v;
  }

  public String toString() {
    return "" + value;
  }
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:08
 * @description 词素：用于管理保留字、标识符和像 && 这样的复合词法单元的词素。它也可以用来管理在中间代码中运算符的书写形式;比如单目减号。例如，源文本中的-2 的中间形式是minus 2。
 */
public class Word extends Token {
  public String lexeme = "";

  public Word(String s, int tag) {
    super(tag);
    lexeme = s;
  }

  public String toString() {
    return lexeme;
  }

  public static final Word
      and = new Word("&&", Tag.AND),
      or = new Word("||", Tag.OR),
      eq = new Word("==", Tag.EQ),
      ne = new Word("!=", Tag.NE),
      le = new Word("<=", Tag.LE),
      ge = new Word(">=", Tag.GE),
      minus = new Word("minus", Tag.MINUS),
      True = new Word("true", Tag.TRUE),
      False = new Word("false", Tag.FALSE),
      temp = new Word("t", Tag.TEMP);
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:09
 * @description 用于处理浮点数
 */
public class Real extends Token {
  public final float value;

  public Real(float v) {
    super(Tag.REAL);
    value = v;
  }

  public String toString() {
    return "" + value;
  }
}

package lexer;

import java.io.*;
import java.util.*;
import symbols.*;


/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:46
 * @description 词法分析主类，函数scan扫描字符识别数字、标识符、和保留字，把字符串映射为字
 */
public class Lexer {
  public static int line = 1;
  char peek = ' ';
  Hashtable<String, Word> words = new Hashtable<>();

  void reserve(Word w) {
    words.put(w.lexeme, w);
  }

  public Lexer() {
    //保留了选定的关键字
    reserve(new Word("if", Tag.IF));
    reserve(new Word("else", Tag.ELSE));
    reserve(new Word("while", Tag.WHILE));
    reserve(new Word("do", Tag.DO));
    reserve(new Word("break", Tag.BREAK));
    //保留了在其他地方定义的对象的词素
    reserve(Word.True);
    reserve(Word.False);
    reserve(Type.Int);
    reserve(Type.Char);
    reserve(Type.Bool);
    reserve(Type.Float);
  }

  /**
   * 把下一个输入字符读取到变量peek中
   * @author  heliming
   * @date    2024/5/23-11:33
   * @throws IOException
   */
  void readch() throws IOException {
    peek = (char) System.in.read();
  }

  /**
   * 识别复合的词法单元例如：<=等符号
   * @author  heliming
   * @date    2024/5/23-11:39
   * @param c 读取下个字符
   * @return
   * @throws IOException
   */
  boolean readch(char c) throws IOException {
    readch();
    if (peek != c) return false;
    peek = ' ';
    return true;
  }

  /**
   * 读取字符
   * @author  heliming
   * @date    2024/5/23-11:40
   * @return 标识符或词素
   * @throws IOException
   */
  public Token scan() throws IOException {
    //循环读字符，不是空和下一行就继续往下执行
    for (;; readch()) {
      if (peek == ' ' || peek == '\t') continue;
      else if (peek == '\n') line = line + 1;
      else break;
    }
    //判断是否为复合词法单元
    switch (peek) {
      case '&':
        if (readch('&')) return Word.and;
        else return new Token('&');
      case '|':
        if (readch('|')) return Word.or;
        else return new Token('|');
      case '=':
        if (readch('=')) return Word.eq;
        else return new Token('=');
      case '!':
        if (readch('=')) return Word.ne;
        else return new Token('!');
      case '<':
        if (readch('=')) return Word.le;
        else return new Token('<');
      case '>':
        if (readch('=')) return Word.ge;
        else return new Token('>');
    }
    //是否为数字组合，例如：365 3.14
    if (Character.isDigit(peek)) {
      int v = 0;
      do {
        v = 10 * v + Character.digit(peek, 10);
        readch();
      } while (Character.isDigit(peek));

      if (peek != '.') return new Num(v);

      float x = v;
      float d = 10;
      for (;;) {
        readch();
        if (!Character.isDigit(peek)) break;
        x = x + Character.digit(peek, 10) / d;
        d = d * 10;
      }
      return new Real(x);
    }

    //字符串
    if (Character.isLetter(peek)) {
      StringBuffer b = new StringBuffer();
      do {
        b.append(peek);
        readch();
      } while (Character.isLetterOrDigit(peek));

      String s = b.toString();
      Word w = (Word) words.get(s);
      if (w != null) return w;
      w = new Word(s, Tag.ID);
      words.put(s, w);
      return w;
    }

    //词法单元返回
    Token tok = new Token(peek);
    peek = ' ';
    return tok;
  }

}

A.4 符号表和类型

package symbols;

import java.util.*;
import lexer.*;
import inter.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:29
 * @description 符号表，把字符串词法单元映射为类Id的对象
 */
public class Env {

  private Hashtable<Token, Id> table;
  protected Env prev;

  public Env(Env n) {
    table = new Hashtable<>();
    prev = n;
  }

  public void put(Token w, Id i) {
    table.put(w, i);
  }

  public Id get(Token w) {
    for (Env e = this; e != null; e = e.prev) {
      Id found = (Id) e.table.get(w);
      if (found != null) {
        return found;
      }
    }
    return null;
  }
}

package symbols;

import lexer.*;
/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:17
 * @description 基本类型名字保留字
 */

public class Type extends Word {
  public int width = 0;

  public Type(String s, int tag, int w) {
    super(s, tag);
    width = w; //width用于存储分配
  }

  public static final Type
      Int = new Type("int", Tag.BASIC, 4),
      Float = new Type("float", Tag.BASIC, 8),
      Char = new Type("char", Tag.BASIC, 1),
      Bool = new Type("bool", Tag.BASIC, 1);

  // numeric method to check if a type is numeric
  public static boolean numeric(Type p) {
    return p == Type.Char || p == Type.Int || p == Type.Float;
  }

  // max method to determine the maximum type for type conversion
  public static Type max(Type p1, Type p2) {
    if (!numeric(p1) || !numeric(p2)) return null;
    else if (p1 == Type.Float || p2 == Type.Float) return Type.Float;
    else if (p1 == Type.Int || p2 == Type.Int) return Type.Int;
    else return Type.Char;
  }
}

package symbols;
import lexer.*;
/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:45
 * @description 数组
 */

public class Array extends Type {
  public Type of; // Represents the type of elements in the array
  public int size = 1; // Represents the size of the array

  public Array(int sz, Type p) {
    super("[]", Tag.INDEX, sz * p.width); // Calling the constructor of the superclass 'Type'
    size = sz;
    of = p;
  }

  @Override
  public String toString() {
    return "[" + size + "]" + of.toString(); // Returning a string representation of the array
  }
}

A.5 表达式的中间代码

包 inter 包含了 Node 的类层次结构。Node 有两个子类:对应于表达式结点的 Expr 和对应干语句结点的 stmt。本节介绍 Expr 和它的子类。Expr 的某些方法处理布尔表达式和跳转代码，这些方法和 Expr 的其他子类将在 A.6 节中讨论。

package inter;

import lexer.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:50
 * @description 抽象语法树中的节点
 */

public class Node {

  int lexline = 0;

  Node() {
    lexline = Lexer.line;
  }

  void error(String s) {
    throw new Error("near line " + lexline + ": " + s);
  }

  static int labels = 0;

  public int newlabel() {
    return ++labels;
  }

  public void emitlabel(int i) {
    System.out.print("L" + i + ":");
  }

  public void emit(String s) {
    System.out.println("\t" + s);
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:52
 * @description 表达式结点
 */

public class Expr extends Node {
  public Token op;//运算符
  public Type type;//类型

  public Expr(Token tok, Type p) {
    op = tok;
    type = p;
  }

  /**
   *
   * @author  heliming
   * @date    2024/5/23-14:01
   * @return 返回了一个“项”,该项可以成为一个三地址指令的右部。给定一个表达式E=E1+E2,方法 gen 返回一个项x1+x2,
   * 其中x1和x2分别是存放E1和E2值的地址。如果这个对象是一个地址，就可以返回 this 值。Expr 的子类通常会重新实现 gen。
   */
  public Expr gen() {
    return this;
  }

  /**
   *
   * @author  heliming
   * @date    2024/5/23-14:04
   * @return 把一个表达式计算(或者说“归约”)成为一个单一的地址。也就是说,它返回一个常量、一个标识符，
   * 或者一个临时名字。给定一个表达式E,方法 reduce 返回一个存放E的值的临时变量t。
   * 如果这个对象是一个地址,那么 this仍然是正确的返回值。
   */
  public Expr reduce() {
    return this;
  }

  /**
   * 为布尔表达式生成跳转代码，
   * 在逻辑运算中，如果条件为真，通常情况下不需要进行跳转，因为程序会继续执行下一条指令。
   * 而如果条件为假，才需要执行跳转操作，以改变程序的执行流程。
   * @author  heliming
   * @date    2024/5/23-14:05
   * @param t true的跳转
   * @param f false的跳转
   */
  public void jumping(int t, int f) {
    emitJumps(toString(), t, f);
  }

  public void emitJumps(String test, int t, int f) {
    if (t != 0 && f != 0) {
      emit("if " + test + " goto L" + t);
      emit("goto L" + f);
    } else if (t != 0) {
      emit("if " + test + " goto L" + t);
    } else if (f != 0) {
      emit("iffalse " + test + " goto L" + f);
    } else {
      // 不生成指令，因为t和f都直接穿越
    }
  }

  @Override
  public String toString() {
    return op.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:31
 * @description 标识符的类Id
 */
public class Id extends Expr {

  // 相对地址
  public int offset;

  public Id(Word id, Type p, int b) {
    super(id, p);
    offset = b;
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-14:47
 * @description op表达式，提供了reduce的实现
 */

public class Op extends Expr {

  public Op(Token tok, Type p) {
    super(tok, p);
  }

  /**
   * 会调用op的子类gen返回临时的名字
   * @return
   */
  public Expr reduce() {
    Expr x = gen();
    Temp t = new Temp(type);
    emit(t.toString() + " = " + x.toString());
    return t;
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-14:55
 * @description 实现了双目运算符
 */

public class Arith extends Op {

  public Expr expr1, expr2;

  public Arith(Token tok, Expr x1, Expr x2) {
    super(tok, null);
    expr1 = x1;
    expr2 = x2;
    type = Type.max(expr1.type, expr2.type);
    if (type == null) {
      error("type error");
    }
  }

  /**
   * 把表达式规约为地址
   * @return
   */
  @Override
  public Expr gen() {
    return new Arith(op, expr1.reduce(), expr2.reduce());
  }

  @Override
  public String toString() {
    return expr1.toString() + " " + op.toString() + " "
        + expr2.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-14:50
 * @description 表达式生成的临时名字
 */

public class Temp extends Expr {

  static int count = 0;
  int number;

  public Temp(Type p) {
    super(Word.temp, p);
    number = ++count;
  }

  @Override
  public String toString() {
    return "t" + number;
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:07
 * @description 处理单目运算符
 */
public class Unary extends Op {

  public Expr expr;

  public Unary(Token tok, Expr x) { //处理单目减法，对!的处理见Not.
    super(tok, null);
    expr = x;
    type = Type.max(Type.Int, expr.type);
    if (type == null) {
      error("type error");
    }
  }

  @Override
  public Expr gen() {
    return new Unary(op, expr.reduce());
  }

  @Override
  public String toString() {
    return op.toString() + expr.toString();
  }
}

A.6 布尔表达式的跳转代码

布尔表达式B的跳转代码由方法 jumping 生成。这个方法的参数是两个标号t和f,它们分别称为表达式 B的 true 出口和false 出口。如果B的值为真,代码中就包含一个目标为t的跳转指令;如果B的值为假，就有一个目标为f的指令。按照惯例,特殊标号0表示控制流从B穿越，到达B的代码之后的下一个指令。

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:12
 * @description 布尔表达式
 */

public class Constant extends Expr {

  public Constant(Token tok, Type p) {
    super(tok, p);
  }

  public Constant(int i) {
    super(new Num(i), Type.Int);
  }

  // Constants for boolean true and false
  public static final Constant True = new Constant(Word.True, Type.Bool);
  public static final Constant False = new Constant(Word.False, Type.Bool);

  // Method for handling jumps
  public void jumping(int t, int f) {
    if (this == True && t != 0) {
      emit("goto L" + t);
    } else if (this == False && f != 0) {
      emit("goto L" + f);
    }
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:23
 * @description 为Or、And、Not 提供一些常见功能
 */

public class Logical extends Expr {

  public Expr expr1, expr2;

  public Logical(Token tok, Expr x1, Expr x2) {
    super(tok, null);
    expr1 = x1;
    expr2 = x2;
    type = check(expr1.type, expr2.type);
    if (type == null) {
      error("type error");
    }
  }

  // Set type to null initially
  public Logical() {
    super(null, null);
  }

  public Type check(Type p1, Type p2) {
    if (p1 == Type.Bool && p2 == Type.Bool) {
      return Type.Bool;
    } else {
      return null;
    }
  }

  @Override
  public Expr gen() {
    int f = newlabel();
    int a = newlabel();
    Temp temp = new Temp(type);
    this.jumping(0, f);
    emit(temp.toString() + " = true");
    emit("goto L" + a);
    emitlabel(f);
    emit(temp.toString() + " = false");
    emitlabel(a);
    return temp;
  }

  @Override
  public String toString() {
    return expr1.toString() + " " + op.toString() + " " + expr2.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:29
 * @description or运算符
 */

public class Or extends Logical {

  public Or(Token tok, Expr x1, Expr x2) {
    super(tok, x1, x2);
  }

  @Override
  public void jumping(int t, int f) {
    int label = (t != 0) ? t : newlabel();
    //如果 expr1 为真：
    //
    //expr1.jumping(label, 0); 会跳转到 label（如果 t != 0）或者在当前位置插入 label（如果 t == 0）。
    //因为 expr1 为真，逻辑 or 表达式的结果也是真，所以程序不会执行 expr2.jumping(t, f);。

    //如果 expr1 为假：
    //
    //expr1.jumping(label, 0); 的假条件跳转目标是 0，表示无需跳转，继续执行后面的代码。
    //expr2.jumping(t, f); 会执行，决定逻辑 or 表达式的最终结果。
    expr1.jumping(label, 0);
    expr2.jumping(t, f);
    //如果 t 等于 0，这意味着在 expr1.jumping(label, 0); 中生成了一个新的标签 label。
    //emitlabel(label); 在当前位置插入 label，确保程序执行到这里时，能够在适当位置标记这个标签。
    if (t == 0) {
      emitlabel(label);
    }
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:50
 * @description and运算符
 */

public class And extends Logical {
  public And(Token tok, Expr x1, Expr x2) {
    super(tok, x1, x2);
  }

  @Override
  public void jumping(int t, int f) {
    //如果 f 不为 0，说明已经有一个假条件的跳转目标，因此将 label 设置为 f。
    //如果 f 为 0，即没有明确的假条件跳转目标，那么通过调用 newlabel() 生成一个新的标签，并将其赋值给 label。
    // 这样就确保了在需要插入假条件跳转目标时，有一个正确的标签可用。
    int label = (f != 0) ? f : newlabel();
    //这里将真条件设置为 0，表示不做跳转。
    //假条件设置为 label，即如果第一个表达式 expr1 的值为假，则跳转到 label。
    expr1.jumping(0, label);
    //这里传入的真条件和假条件与方法参数相同，保持了一致性。
    expr2.jumping(t, f);
    //如果传入的假条件 f 为 0，表示没有明确的假条件跳转目标。
    //在这种情况下，通过插入 label 标签，可以确保在需要跳转时有一个正确的标签可用，以确保程序的正确性。
    if (f == 0) {
      emitlabel(label);
    }
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-16:50
 * @description not 单目运算符
 */

public class Not extends Logical {
  public Not(Token tok, Expr x2) {
    super(tok, x2, x2);
  }

  public void jumping(int t, int f) {
    expr2.jumping(f, t);
  }

  public String toString() {
    return op.toString() + " " + expr2.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-16:59
 * @description 运算符<、<=、==、!=、>=、>
 */

public class Rel extends Logical {
  public Rel(Token tok, Expr x1, Expr x2) {
    super(tok, x1, x2);
  }

  public Type check(Type p1, Type p2) {
    if (p1 instanceof Array || p2 instanceof Array)
      return null;
    else if (p1 == p2)
      return Type.Bool;
    else
      return null;
  }

  public void jumping(int t, int f) {
    Expr a = expr1.reduce();
    Expr b = expr2.reduce();
    String test = a.toString() + " " + op.toString() + " " + b.toString();
    emitJumps(test, t, f);
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-17:24
 * @description 支持数组中的布尔值表达式
 */

public class Access extends Op {
  public Id array;
  public Expr index;

  public Access(Id a, Expr i, Type p) {
    super(new Word("[]", Tag.INDEX), p);
    array = a;
    index = i;
  }

  public Expr gen() {
    return new Access(array, index.reduce(), type);
  }

  public void jumping(int t, int f) {
    emitJumps(reduce().toString(), t, f);
  }

  public String toString() {
    return array.toString() + "[" + index.toString() + "]";
  }
}

A.7 语句的中间代码

每个语句构造被实现为 stmt 的一个子类。一个构造的组成部分对应的字段是相应子类的对象。例如，如我们将看到的，类 while 有一个对应于测试表达式的字段和一个子语句字段。下面的类 stmt 的代码中构造函数Stmt()和 Stmt.Nu11处理抽象语法树的构造。构造函数 Stmt()不做任何事情，因为相关处理工作是在子类中完成的。静态对象 Stmt.Nu11(第4行)表示一个空的语句序列。

package inter;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-17:56
 * @description 语句构造模板，需要子类实现
 */

public class Stmt extends Node {

  public Stmt() {}

  public static Stmt Null = new Stmt();

  public void gen(int b, int a) {}//调用时的参数是语句开始处的标号和语句的下一条指令的标号

  int after = 0;// 保存语句的下一条指令的标号

  public static Stmt Enclosing = Stmt.Null;  // 用于break语句

}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-18:06
 * @description if语句
 */

public class If extends Stmt {

  Expr expr;
  Stmt stmt;

  public If(Expr x, Stmt s) {
    expr = x;
    stmt = s;
    if (expr.type != Type.Bool)
      expr.error("boolean required in if");
  }

  public void gen(int b, int a) {
    int label = newlabel(); // stmt的代码的标号
    // 为真时控制流穿越，为假时转向a
    expr.jumping(0, a);
    emitlabel(label);
    //判断传入的S
    stmt.gen(label, a);
  }
}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-20:48
 * @description else语句
 * 1. 生成标签：
 *
 * label1 和 label2 分别用于 if 和 else 分支的开始位置。
 * 2. 处理条件跳转：
 *
 * expr.jumping(0, label2);：生成条件跳转代码。如果条件表达式为假，控制流跳转到 label2，即 else 分支的位置。
 * 3. 生成 if 分支代码：
 *
 * emitlabel(label1);：在当前位置插入 label1 标签，表示 if 分支的开始。
 * stmt1.gen(label1, a);：调用 stmt1 的 gen 方法生成 if 分支的代码。
 * emit("goto L" + a);：生成无条件跳转，确保 if 分支执行完后，控制流直接跳转到 a，即整个 if-else 语句之后的位置。这一步非常重要，因为它避免了在 if 分支执行完之后继续执行 else 分支的代码。
 * 4. 生成 else 分支代码：
 *
 * emitlabel(label2);：在当前位置插入 label2 标签，表示 else 分支的开始。
 * stmt2.gen(label2, a);：调用 stmt2 的 gen 方法生成 else 分支的代码。因为 else 分支是最后一部分代码，所以执行完后自然会跳转到 a。
 */

public class Else extends Stmt {
  Expr expr;
  Stmt stmt1, stmt2;

  public Else(Expr x, Stmt s1, Stmt s2) {
    expr = x;
    stmt1 = s1;
    stmt2 = s2;
    if (expr.type != Type.Bool)
      expr.error("boolean required in if");
  }

  /**
   *
   * @param b
   * @param a 指示整个 if-else 结构结束后的继续位置。
   */
  public void gen(int b, int a) {
    int label1 = newlabel(); // label1用于语句stmt1 if
    int label2 = newlabel(); // label2用于语句stmt2 else

    expr.jumping(0, label2); //为真时控制流穿越到stmt1 // 为假时跳转到label2，即stmt2
    emitlabel(label1);//插入标签 label1
    stmt1.gen(label1, a); // 为真时执行stmt1
    emit("goto L" + a); // 执行完stmt1后跳转到a //插入无条件跳转到标签 a 的代码，确保 stmt1 执行完后跳转到 a。
    emitlabel(label2);//插入标签 label2
    //在 else 分支中，stmt2.gen(label2, a); 生成的代码会在 else 分支执行完后自然结束，并且顺序执行到 a 位置。
    // 如果再加上 emit("goto L" + a);，会多此一举。
    stmt2.gen(label2, a); // 为假时执行stmt2
  }
}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-21:07
 * @description While语句
 */

public class While extends Stmt {
  Expr expr;
  Stmt stmt;

  public While() {
    expr = null;
    stmt = null;
  }

  public void init(Expr x, Stmt s) {
    expr = x;
    stmt = s;
    if (expr.type != Type.Bool)
      expr.error("boolean required in while");
  }

  public void gen(int b, int a) {
    after = a; // 保存标号 a
    expr.jumping(0, a); // 为假时跳转到 a
    int label = newlabel(); // 用于 stmt 的标号
    emitlabel(label);
    stmt.gen(label, b);
    emit("goto L" + b);
  }
}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:26
 * @description do-while语句
 * 对比 Do 和 While
 * While 循环：先判断条件，然后执行循环体；如果条件为假，跳出循环。
 * Do-While 循环：先执行循环体，然后判断条件；如果条件为真，重新进入循环体，否则跳出循环。
 */

public class Do extends Stmt {
  Expr expr;
  Stmt stmt;

  public Do() {
    expr = null;
    stmt = null;
  }

  public void init(Stmt s, Expr x) {
    stmt = s;
    expr = x;
    if (expr.type != Type.Bool)
      expr.error("boolean required in do");
  }

  public void gen(int b, int a) {
    after = a; // 保存标号 a
    int label = newlabel(); // 用于 expr 的标号
    stmt.gen(b, label); // 生成循环体的代码，起始标签为 b，结束标签为 label
    emitlabel(label); // 插入 label 标签，表示条件判断的位置
    expr.jumping(b, 0); // 生成条件跳转代码，如果条件表达式为真，跳转到标签 b（即循环开始），否则跳出循环。
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:32
 * @description 类 set 实现了左部为标识符且右部为一个表达式的赋值语句
 */

public class Set extends Stmt {
  public Id id; //表示赋值语句中的左值，即变量名
  public Expr expr; //表示赋值语句中的右值，即表达式

  public Set(Id i, Expr x) {
    id = i;
    expr = x;
    if (check(id.type, expr.type) == null)
      error("type error");
  }

  public Type check(Type p1, Type p2) {
    if (Type.numeric(p1) && Type.numeric(p2))
      return p2;
    else if (p1 == Type.Bool && p2 == Type.Bool)
      return p2;
    else
      return null;
  }

  /**
   * 生成三地址指令
   * @param b
   * @param a
   */
  public void gen(int b, int a) {
    emit(id.toString() + " = " + expr.gen().toString());
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:37
 * @description 类SetElem 实现了对数组元素的赋值。
 */

public class SetElem extends Stmt {
  public Id array;//表示数组变量
  public Expr index;//表示数组的索引
  public Expr expr;//表示赋值语句中的右值表达式

  public SetElem(Access x, Expr y) {
    array = x.array;
    index = x.index;
    expr = y;
    if (check(x.type, expr.type) == null)
      error("type error");
  }

  public Type check(Type p1, Type p2) {
    if (p1 instanceof Array || p2 instanceof Array)
      return null;
    else if (p1 == p2)
      return p2;
    else if (Type.numeric(p1) && Type.numeric(p2))
      return p2;
    else
      return null;
  }

  public void gen(int b, int a) {
    String s1 = index.reduce().toString();
    String s2 = expr.reduce().toString();
    emit(array.toString() + "[" + s1 + "] = " + s2);
  }
}

package inter;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:43
 * @description 类 seq 实现了一个语句序列
 */

public class Seq extends Stmt {
  Stmt stmt1;
  Stmt stmt2;

  public Seq(Stmt s1, Stmt s2) {
    stmt1 = s1;
    stmt2 = s2;
  }

  /**
   * 如果第一个语句为空：直接生成第二个语句的代码。
   * 如果第二个语句为空：直接生成第一个语句的代码。
   * 如果两个语句都不为空：
   * 为第一个语句生成代码并在其后插入一个新的标签。
   * 输出标签，表示第一个语句执行完毕并转到下一个标签。
   * 生成第二个语句的代码，使用生成的标签作为起始点，并将控制流转移到结束标签 a。
   * @param b 开始
   * @param a 结束
   */
  public void gen(int b, int a) {
    if (stmt1 == Stmt.Null) {
      stmt2.gen(b, a);
    } else if (stmt2 == Stmt.Null) {
      stmt1.gen(b, a);
    } else {
      int label = newlabel();
      stmt1.gen(b, label);
      emitlabel(label);
      stmt2.gen(label, a);
    }
  }
}

package inter;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:51
 * @description 一个break 语句把控制流转出它的外围循环或外围switch 语句。
 */

public class Break extends Stmt {
  Stmt stmt;

  public Break() {
    if (Stmt.Enclosing == Stmt.Null)//如果 Enclosing 为 null，即没有包围的循环或 switch 语句，则报错。
      error("unenclosed break");
    stmt = Stmt.Enclosing;//将 Enclosing 赋值给 stmt，表示 break 语句所属的循环或 switch 语句。
  }

  public void gen(int b, int a) {
    emit("goto L" + stmt.after);
  }
}

A.8 语法分析器

语法分析器读人一个由词法单元组成的流，并调用适当的在A.5~A.7节中讨论的构造函数，构建出一棵抽象语法树。

package parser;

import java.io.*;
import lexer.*;
import symbols.*;
import inter.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:48
 * @description 语法分析器读人一个由词法单元组成的流，并调用适当的在A.5~A.7节中讨论的构造函数，构建出一棵抽象语法树。
 * 当前符号表按照2.7节中图 2-38 的翻译方案进行处理。
 */

public class Parser {
  private Lexer lex; //这个语法分析器的词法分析器
  private Token look; // 向前看词法单元
  Env top = null; // 当前或顶层的符号表
  int used = 0; // 用于变量声明的存储位置

  public Parser(Lexer l) throws IOException {
    lex = l;
    move();
  }

  void move() throws IOException {
    look = lex.scan();
  }

  void error(String s) {
    throw new Error("near line " + lex.line + ": " + s);
  }

  /**
   * 在语法分析过程中检查当前词法单元的标签是否符合预期，如果不符合则抛出语法错误，符合移动到下个词法单元。
   * @author  heliming
   * @date    2024/5/24-10:35
   * @param t
   * @throws IOException
   */
  void match(int t) throws IOException {
    if (look.tag == t) {
      move();
    } else {
      error("syntax error");
    }
  }

  /**
   * 构建抽象语法树，对整个程序进行语法分析，生成对应的代码，并在适当的位置插入标签以表示程序的开始和结束位置。
   * @author  heliming
   * @date    2024/5/24-10:47
   * @throws IOException
   */
  public void program() throws IOException {
    // program -> block
    Stmt s = block();
    int begin = s.newlabel();
    int after = s.newlabel();
    s.emitlabel(begin);
    s.gen(begin, after);
    s.emitlabel(after);
  }

  /**
   * 代码块分析，解析块语句，包括变量声明部分和语句部分，并在解析过程中维护符号表环境。
   * @author  heliming
   * @date    2024/5/24-10:46
   * @return
   * @throws IOException
   */
  Stmt block() throws IOException {  // block -> { decls stmts }
    match('{');
    Env savedEnv = top;
    top = new Env(top);
    decls();
    Stmt s = stmts();
    match('}');
    top = savedEnv;
    return s;
  }

  /**
   * 循环解析变量声明语句，将解析得到的标识符和类型信息存储到符号表中，并更新使用的位置。
   * 程序中的声明会被处理为符号表中有关标识符的条目。虽然这里没有显示，声明还可能生成在运行时刻为标识符保留存储空间的指令。
   * @author  heliming
   * @date    2024/5/24-10:59
   * @throws IOException
   */
  void decls() throws IOException {
    while (look.tag == Tag.BASIC) {  // D -> type ID ;
      Type p = type();
      Token tok = look;
      match(Tag.ID);
      match(';');
      Id id = new Id((Word) tok, p, used);
      top.put(tok, id);
      used = used + p.width;
    }
  }

  /**
   * 解析变量的类型，如果是基本类型则直接返回，如果是数组类型则调用 dims(p) 方法进行处理。
   * @author  heliming
   * @date    2024/5/24-11:06
   * @return
   * @throws IOException
   */
  Type type() throws IOException {
    Type p = (Type) look;//期望look.tag == Tag.BASIC
    match(Tag.BASIC);
    if (look.tag != '[') {
      return p; // T -> basic
    } else {
      return dims(p); // 返回数组类型
    }
  }

  /**
   * 解析数组类型，包括数组的维度和每个维度的大小，并返回一个表示数组类型的对象。
   * @author  heliming
   * @date    2024/5/24-11:09
   * @param p 数组类型
   * @return
   * @throws IOException
   */
  Type dims(Type p) throws IOException {
    match('[');
    Token tok = look;
    match(Tag.NUM);
    match(']');
    if (look.tag == '[') {
      p = dims(p);
    }
    return new Array(((Num) tok).value, p);
  }

  /**
   * 解析语句序列，直到遇到右大括号 } 结束，返回一个包含所有解析的语句的序列对象。
   * @author  heliming
   * @date    2024/5/24-11:11
   * @return
   * @throws IOException
   */
  Stmt stmts() throws IOException {
    //如果当前词法单元的标签是右大括号 }，表示语句序列结束，返回空语句 Stmt.Null。
    if (look.tag == '}') {
      return Stmt.Null;
    } else {
      //如果当前词法单元的标签不是右大括号 }，表示还有语句需要解析，调用 stmt() 方法解析当前语句，
      // 并递归调用 stmts() 方法解析下一个语句，然后返回一个新的序列语句对象 Seq，连接当前语句和下一个语句
      return new Seq(stmt(), stmts());
    }
  }

  /**
   * 非终结符号stmt的各个产生式 switch
   * @author  heliming
   * @date    2024/5/24-11:18
   * @return
   * @throws IOException
   */
  Stmt stmt() throws IOException {
    Expr x;
    Stmt s, s1, s2;
    Stmt savedStmt; // 用于为break语句保存外层的循环语句

    switch (look.tag) {
      case ';':
        move();
        return Stmt.Null;
      case Tag.IF:
        match(Tag.IF);
        match('(');
        x = bool();
        match(')');
        s1 = stmt();
        if (look.tag != Tag.ELSE) {
          return new If(x, s1);
        }
        match(Tag.ELSE);
        s2 = stmt();
        return new Else(x, s1, s2);
      case Tag.WHILE:
        While whilenode = new While();
        //保存当前的外层语句节点，这在处理嵌套循环时会很有用。
        savedStmt = Stmt.Enclosing;
        //将当前的 while 循环语句节点设置为外层语句节点，以便在内部语句中可以访问到外层的 while 循环节点。
        Stmt.Enclosing = whilenode;
        match(Tag.WHILE);
        match('(');
        x = bool();
        match(')');
        s1 = stmt();
        whilenode.init(x, s1);
        Stmt.Enclosing = savedStmt; // 重置Stmt.Enclosing 恢复之前保存的外层语句节点，保持语法树的正确结构。
        return whilenode;
      case Tag.DO:
        Do donode = new Do();
        savedStmt = Stmt.Enclosing;
        Stmt.Enclosing = donode;
        match(Tag.DO);
        s1 = stmt();
        match(Tag.WHILE);
        match('(');
        x = bool();
        match(')');
        match(';');
        donode.init(s1, x);
        Stmt.Enclosing = savedStmt; // 重置Stmt.Enclosing
        return donode;
      case Tag.BREAK:
        match(Tag.BREAK);
        match(';');
        return new Break();
      case '{':
        return block();
      default:
        return assign();
    }
  }

  /**
   * 赋值语句的语法解析
   * @author  heliming
   * @date    2024/5/24-13:30
   * @return
   * @throws IOException
   */
  Stmt assign() throws IOException {
    Stmt stmt;
    Token t = look;
    match(Tag.ID);
    Id id = top.get(t);
    if (id == null) error(t.toString() + " undeclared");
    if (look.tag == '=') { // S -> id = E
      move();
      stmt = new Set(id, bool());
    } else { // S -> L = E
      Access x = offset(id);
      match('=');
      stmt = new SetElem(x, bool());
    }
    match(';');
    return stmt;
  }

  /**
   * 解析布尔表达式
   * @author  heliming
   * @date    2024/5/24-13:01
   * @return
   * @throws IOException
   */
  Expr bool() throws IOException {
    Expr x = join();
    while (look.tag == Tag.OR) {
      Token tok = look;
      move();
      x = new Or(tok, x, join());
    }
    return x;
  }

  /**
   * 解析包含逻辑与操作 (AND) 的布尔表达式
   * @author  heliming
   * @date    2024/5/24-13:02
   * @return
   * @throws IOException
   */
  Expr join() throws IOException {
    Expr x = equality();
    while (look.tag == Tag.AND) {
      Token tok = look;
      move();
      x = new And(tok, x, equality());
    }
    return x;
  }

  /**
   * 解析相等性表达式，例如 == 和 != 运算符
   * @author  heliming
   * @date    2024/5/24-13:09
   * @return
   * @throws IOException
   */
  Expr equality() throws IOException {
    Expr x = rel();
    while (look.tag == Tag.EQ || look.tag == Tag.NE) {
      Token tok = look;
      move();
      x = new Rel(tok, x, rel());
    }
    return x;
  }

  /**
   * 解析包含关系运算符 (<, <=, >=, >) 的表达式
   * @author  heliming
   * @date    2024/5/24-13:03
   * @return
   * @throws IOException
   */
  Expr rel() throws IOException {
    Expr x = expr();
    switch (look.tag) {
      case '<':
      case Tag.LE:
      case Tag.GE:
      case '>':
        Token tok = look;
        move();
        return new Rel(tok, x, expr());
      default:
        return x;
    }
  }

  /**
   * 解析了包含加法和减法运算符的表达式
   * @author  heliming
   * @date    2024/5/24-13:05
   * @return
   * @throws IOException
   */
  Expr expr() throws IOException {
    Expr x = term();
    while (look.tag == '+' || look.tag == '-') {
      Token tok = look;
      move();
      x = new Arith(tok, x, term());
    }
    return x;
  }

  /**
   * 解析包含乘法 (*) 和除法 (/) 运算符的表达式
   * @author  heliming
   * @date    2024/5/24-13:05
   * @return
   * @throws IOException
   */
  Expr term() throws IOException {
    Expr x = unary();
    while (look.tag == '*' || look.tag == '/') {
      Token tok = look;
      move();
      x = new Arith(tok, x, unary());
    }
    return x;
  }

  /**
   * 解析一元运算符的表达式，例如负号（-）和逻辑非运算符（!）
   * @author  heliming
   * @date    2024/5/24-13:06
   * @return
   * @throws IOException
   */
  Expr unary() throws IOException {
    if (look.tag == '-') {
      move();
      return new Unary(Word.minus, unary());
    } else if (look.tag == '!') {
      Token tok = look;
      move();
      return new Not(tok, unary());
    } else {
      return factor();
    }
  }

  /**
   * 解析基本表达式，如括号内的表达式、常量和标识符
   * @author  heliming
   * @date    2024/5/24-13:08
   * @return
   * @throws IOException
   */
  Expr factor() throws IOException {
    Expr x = null;
    switch (look.tag) {
      case '(':
        move();
        x = bool();
        match(')');
        return x;
      case Tag.NUM:
        x = new Constant(look, Type.Int);
        move();
        return x;
      case Tag.REAL://如果标签是实数，则表示一个浮点常量
        x = new Constant(look, Type.Float);
        move();
        return x;
      case Tag.TRUE:
        x = Constant.True;
        move();
        return x;
      case Tag.FALSE:
        x = Constant.False;
        move();
        return x;
      case Tag.ID://如果标签是标识符，则可能是变量或数组访问。
        String s = look.toString();
        Id id = top.get(look);
        if (id == null) error(look.toString() + " undeclared");
        move();
        if (look.tag != '[') {
          return id;
        } else {
          return offset(id);
        }
      default:
        error("syntax error");
        return x;
    }
  }

  /**
   * 对数组元素的访问，支持多维数组
   * @author  heliming
   * @date    2024/5/24-13:14
   * @param a
   * @return
   * @throws IOException
   */
  Access offset(Id a) throws IOException {  // I -> [E] | [E] I
    Expr i;
    Expr w;
    Expr t1, t2;
    Expr loc; // 继承 id
    Type type = a.type; // 第一个下标 ，I -> [E]
    match('[');
    i = bool();
    match(']');
    type = ((Array) type).of;
    w = new Constant(type.width);
    t1 = new Arith(new Token('*'), i, w);
    loc = t1;

    // 多维下标，I -> [E] I
    while (look.tag == '[') {
      match('[');
      i = bool();
      match(']');
      type = ((Array) type).of;
      w = new Constant(type.width);
      t1 = new Arith(new Token('*'), i, w);
      t2 = new Arith(new Token('+'), loc, t1);
      loc = t2;
    }
    return new Access(a, loc, type);
  }

}

A.9 创建前端

javac lexer/*.java
javac symbols/*.java
javac inter/*.java
javac parser/*.java
javac main/*.java

上面的 javac 命令为每个类创建了.class 文件。要练习使用我们的翻译器，只需要输人java main.Main，后面跟上将要被翻译的源程序，比如文件 test 中的内容:

{
    int i;
    int j;
    float v;
    float x;
    float[100] a;

    while (true) {
    do i = i + 1; while (a[i] < v);
    do j = j - 1; while (a[j] > v);
    if (i >= j) break;
    x = a[i];
    a[i] = a[j];
    a[j] = x;
    }
  }

对于这个输人，这个前端输出:

L1:L3:    i = i + 1
L5:    t1 = i * 8
    t2 = a[t1]
    if t2 < v goto L3
L4:    j = j - 1
L7:    t3 = j * 8
    t4 = a[t3]
    if t4 > v goto L4
L6:    iffalse i >= j goto L8
L9:    goto L2
L8:    t5 = i * 8
    x = a[t5]
L10:    t6 = i * 8
    t7 = j * 8
    t8 = a[t7]
    a[t6] = t8
L11:    t9 = j * 8c
    a[t9] = x
    goto L1
L2:

如果中间代码变为c是这样的

#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>

int main() {
    int i = 0;
    int j = 0;
    float v = 0.0;
    float x = 0.0;
    float a[100] = {0.0};
    // 打印排序前的数组a的内容，额外加的方便看内容
    printf("排序前的数组a为：\n");
    int k; // 在循环外声明变量
    // 填充数组a的随机数
    for ( k = 0; k < 100; k++) {
        a[k] = (float)rand() / RAND_MAX * 100.0; // 生成0到100之间的随机数
    }
    j = 30;                 }
    v = 30;
    for ( k = 0; k < 100; k++) {
        printf("%.2f ", a[k]);
    }
    printf("\n");
    
    while (true) {
        do {
            i = i + 1;
        } while (a[i] < v);

        do {
            j = j - 1;
        } while (a[j] > v);

        if (i >= j) {
            break;
        }

        x = a[i];
        a[i] = a[j];
        a[j] = x;
    }
  // 打印排序后的数组a的内容，额外加的方便看内容
    printf("排序后的数组a为：\n");
    for ( k = 0; k < 100; k++) {
        printf("%.2f ", a[k]);
    }
    printf("\n");
    
    return 0;
}

编译
gcc -o main main.c
运行
./main

一个完整的编译器前端

这个翻译器的 Java 代码由五个包组成:main、lexer、symbol、parser 和 inter。包 inter 中包含的类处理用抽象语法表示的语言结构。

A.1源语言

A.2 Main

程序的执行从类 Main 的方法 main 开始。方法 main 创建了一个词法分析器和一个语法分析器，然后调用语法分析器中的方法 program。

package main;

import java.io.*;
import lexer.*;
import parser.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:44
 * @description 主程序
 */

public class Main {

  public static void main(String[] args) throws IOException {
    Lexer lex = new Lexer();
    Parser parse = new Parser(lex);
    parse.program();
    System.out.write('\n');
  }
}
/*
输入源代码：
{
    int i;
    int j;
    float v;
    float x;
    float[100] a;

    while (true) {
    do i = i + 1; while (a[i] < v);
    do j = j - 1; while (a[j] > v);
    if (i >= j) break;
    x = a[i];
    a[i] = a[j];
    a[j] = x;
    }
  }
输出中间代码：
L1:L3:    i = i + 1
L5:    t1 = i * 8
    t2 = a[t1]
    if t2 < v goto L3
L4:    j = j - 1
L7:    t3 = j * 8
    t4 = a[t3]
    if t4 > v goto L4
L6:    iffalse i >= j goto L8
L9:    goto L2
L8:    t5 = i * 8
    x = a[t5]
L10:    t6 = i * 8
    t7 = j * 8
    t8 = a[t7]
    a[t6] = t8
L11:    t9 = j * 8
    a[t9] = x
    goto L1
L2:

中间代码转换c代码（这个没写）
#include <stdio.h>
#include <stdbool.h>

int main() {
    int i = 0;
    int j = 0;
    float v = 0.0;
    float x = 0.0;
    float a[100] = {0.0};

    while (true) {
        do {
            i = i + 1;
        } while (a[i] < v);

        do {
            j = j - 1;
        } while (a[j] > v);

        if (i >= j) {
            break;
        }

        x = a[i];
        a[i] = a[j];
        a[j] = x;
    }

    return 0;
}
编译
gcc -o main main.c
运行
./main

  */

A.3 词法分析器

包 lexer 是2.6.5 节中的词法分析器的代码的扩展。类 Tag 定义了各个词法单元对应的常量:

其中的三个常量 INDEX、MINUS 和 TEMP 不是词法单元,它们将在抽象语法树中使用。

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:51
 * @description 词法单元对应的常量值，保留字
 */
public class Tag {

  public final static int
      AND = 256,
      BASIC = 257,
      BREAK = 258,
      DO = 259,
      ELSE = 260,
      EQ = 261,
      FALSE = 262,
      GE = 263,
      ID = 264,
      IF = 265,
      INDEX = 266,
      LE = 267,
      MINUS = 268,
      NE = 269,
      NUM = 270,
      OR = 271,
      REAL = 272,
      TEMP = 273,
      TRUE = 274,
      WHILE = 275;
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:59
 * @description 标识符，词法单元
 */
public class Token {

  public final int tag;

  public Token(int t) {
    tag = t;
  }

  public String toString() {
    return "" + (char) tag;
  }
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:02
 * @description 数字标识符
 */
public class Num extends Token{
  public final int value;

  public Num(int v) {
    super(Tag.NUM);
    value = v;
  }

  public String toString() {
    return "" + value;
  }
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:08
 * @description 词素：用于管理保留字、标识符和像 && 这样的复合词法单元的词素。它也可以用来管理在中间代码中运算符的书写形式;比如单目减号。例如，源文本中的-2 的中间形式是minus 2。
 */
public class Word extends Token {
  public String lexeme = "";

  public Word(String s, int tag) {
    super(tag);
    lexeme = s;
  }

  public String toString() {
    return lexeme;
  }

  public static final Word
      and = new Word("&&", Tag.AND),
      or = new Word("||", Tag.OR),
      eq = new Word("==", Tag.EQ),
      ne = new Word("!=", Tag.NE),
      le = new Word("<=", Tag.LE),
      ge = new Word(">=", Tag.GE),
      minus = new Word("minus", Tag.MINUS),
      True = new Word("true", Tag.TRUE),
      False = new Word("false", Tag.FALSE),
      temp = new Word("t", Tag.TEMP);
}

package lexer;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:09
 * @description 用于处理浮点数
 */
public class Real extends Token {
  public final float value;

  public Real(float v) {
    super(Tag.REAL);
    value = v;
  }

  public String toString() {
    return "" + value;
  }
}

package lexer;

import java.io.*;
import java.util.*;
import symbols.*;


/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:46
 * @description 词法分析主类，函数scan扫描字符识别数字、标识符、和保留字，把字符串映射为字
 */
public class Lexer {
  public static int line = 1;
  char peek = ' ';
  Hashtable<String, Word> words = new Hashtable<>();

  void reserve(Word w) {
    words.put(w.lexeme, w);
  }

  public Lexer() {
    //保留了选定的关键字
    reserve(new Word("if", Tag.IF));
    reserve(new Word("else", Tag.ELSE));
    reserve(new Word("while", Tag.WHILE));
    reserve(new Word("do", Tag.DO));
    reserve(new Word("break", Tag.BREAK));
    //保留了在其他地方定义的对象的词素
    reserve(Word.True);
    reserve(Word.False);
    reserve(Type.Int);
    reserve(Type.Char);
    reserve(Type.Bool);
    reserve(Type.Float);
  }

  /**
   * 把下一个输入字符读取到变量peek中
   * @author  heliming
   * @date    2024/5/23-11:33
   * @throws IOException
   */
  void readch() throws IOException {
    peek = (char) System.in.read();
  }

  /**
   * 识别复合的词法单元例如：<=等符号
   * @author  heliming
   * @date    2024/5/23-11:39
   * @param c 读取下个字符
   * @return
   * @throws IOException
   */
  boolean readch(char c) throws IOException {
    readch();
    if (peek != c) return false;
    peek = ' ';
    return true;
  }

  /**
   * 读取字符
   * @author  heliming
   * @date    2024/5/23-11:40
   * @return 标识符或词素
   * @throws IOException
   */
  public Token scan() throws IOException {
    //循环读字符，不是空和下一行就继续往下执行
    for (;; readch()) {
      if (peek == ' ' || peek == '\t') continue;
      else if (peek == '\n') line = line + 1;
      else break;
    }
    //判断是否为复合词法单元
    switch (peek) {
      case '&':
        if (readch('&')) return Word.and;
        else return new Token('&');
      case '|':
        if (readch('|')) return Word.or;
        else return new Token('|');
      case '=':
        if (readch('=')) return Word.eq;
        else return new Token('=');
      case '!':
        if (readch('=')) return Word.ne;
        else return new Token('!');
      case '<':
        if (readch('=')) return Word.le;
        else return new Token('<');
      case '>':
        if (readch('=')) return Word.ge;
        else return new Token('>');
    }
    //是否为数字组合，例如：365 3.14
    if (Character.isDigit(peek)) {
      int v = 0;
      do {
        v = 10 * v + Character.digit(peek, 10);
        readch();
      } while (Character.isDigit(peek));

      if (peek != '.') return new Num(v);

      float x = v;
      float d = 10;
      for (;;) {
        readch();
        if (!Character.isDigit(peek)) break;
        x = x + Character.digit(peek, 10) / d;
        d = d * 10;
      }
      return new Real(x);
    }

    //字符串
    if (Character.isLetter(peek)) {
      StringBuffer b = new StringBuffer();
      do {
        b.append(peek);
        readch();
      } while (Character.isLetterOrDigit(peek));

      String s = b.toString();
      Word w = (Word) words.get(s);
      if (w != null) return w;
      w = new Word(s, Tag.ID);
      words.put(s, w);
      return w;
    }

    //词法单元返回
    Token tok = new Token(peek);
    peek = ' ';
    return tok;
  }

}

A.4 符号表和类型

package symbols;

import java.util.*;
import lexer.*;
import inter.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:29
 * @description 符号表，把字符串词法单元映射为类Id的对象
 */
public class Env {

  private Hashtable<Token, Id> table;
  protected Env prev;

  public Env(Env n) {
    table = new Hashtable<>();
    prev = n;
  }

  public void put(Token w, Id i) {
    table.put(w, i);
  }

  public Id get(Token w) {
    for (Env e = this; e != null; e = e.prev) {
      Id found = (Id) e.table.get(w);
      if (found != null) {
        return found;
      }
    }
    return null;
  }
}

package symbols;

import lexer.*;
/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-11:17
 * @description 基本类型名字保留字
 */

public class Type extends Word {
  public int width = 0;

  public Type(String s, int tag, int w) {
    super(s, tag);
    width = w; //width用于存储分配
  }

  public static final Type
      Int = new Type("int", Tag.BASIC, 4),
      Float = new Type("float", Tag.BASIC, 8),
      Char = new Type("char", Tag.BASIC, 1),
      Bool = new Type("bool", Tag.BASIC, 1);

  // numeric method to check if a type is numeric
  public static boolean numeric(Type p) {
    return p == Type.Char || p == Type.Int || p == Type.Float;
  }

  // max method to determine the maximum type for type conversion
  public static Type max(Type p1, Type p2) {
    if (!numeric(p1) || !numeric(p2)) return null;
    else if (p1 == Type.Float || p2 == Type.Float) return Type.Float;
    else if (p1 == Type.Int || p2 == Type.Int) return Type.Int;
    else return Type.Char;
  }
}

package symbols;
import lexer.*;
/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:45
 * @description 数组
 */

public class Array extends Type {
  public Type of; // Represents the type of elements in the array
  public int size = 1; // Represents the size of the array

  public Array(int sz, Type p) {
    super("[]", Tag.INDEX, sz * p.width); // Calling the constructor of the superclass 'Type'
    size = sz;
    of = p;
  }

  @Override
  public String toString() {
    return "[" + size + "]" + of.toString(); // Returning a string representation of the array
  }
}

A.5 表达式的中间代码

package inter;

import lexer.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:50
 * @description 抽象语法树中的节点
 */

public class Node {

  int lexline = 0;

  Node() {
    lexline = Lexer.line;
  }

  void error(String s) {
    throw new Error("near line " + lexline + ": " + s);
  }

  static int labels = 0;

  public int newlabel() {
    return ++labels;
  }

  public void emitlabel(int i) {
    System.out.print("L" + i + ":");
  }

  public void emit(String s) {
    System.out.println("\t" + s);
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:52
 * @description 表达式结点
 */

public class Expr extends Node {
  public Token op;//运算符
  public Type type;//类型

  public Expr(Token tok, Type p) {
    op = tok;
    type = p;
  }

  /**
   *
   * @author  heliming
   * @date    2024/5/23-14:01
   * @return 返回了一个“项”,该项可以成为一个三地址指令的右部。给定一个表达式E=E1+E2,方法 gen 返回一个项x1+x2,
   * 其中x1和x2分别是存放E1和E2值的地址。如果这个对象是一个地址，就可以返回 this 值。Expr 的子类通常会重新实现 gen。
   */
  public Expr gen() {
    return this;
  }

  /**
   *
   * @author  heliming
   * @date    2024/5/23-14:04
   * @return 把一个表达式计算(或者说“归约”)成为一个单一的地址。也就是说,它返回一个常量、一个标识符，
   * 或者一个临时名字。给定一个表达式E,方法 reduce 返回一个存放E的值的临时变量t。
   * 如果这个对象是一个地址,那么 this仍然是正确的返回值。
   */
  public Expr reduce() {
    return this;
  }

  /**
   * 为布尔表达式生成跳转代码，
   * 在逻辑运算中，如果条件为真，通常情况下不需要进行跳转，因为程序会继续执行下一条指令。
   * 而如果条件为假，才需要执行跳转操作，以改变程序的执行流程。
   * @author  heliming
   * @date    2024/5/23-14:05
   * @param t true的跳转
   * @param f false的跳转
   */
  public void jumping(int t, int f) {
    emitJumps(toString(), t, f);
  }

  public void emitJumps(String test, int t, int f) {
    if (t != 0 && f != 0) {
      emit("if " + test + " goto L" + t);
      emit("goto L" + f);
    } else if (t != 0) {
      emit("if " + test + " goto L" + t);
    } else if (f != 0) {
      emit("iffalse " + test + " goto L" + f);
    } else {
      // 不生成指令，因为t和f都直接穿越
    }
  }

  @Override
  public String toString() {
    return op.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-13:31
 * @description 标识符的类Id
 */
public class Id extends Expr {

  // 相对地址
  public int offset;

  public Id(Word id, Type p, int b) {
    super(id, p);
    offset = b;
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-14:47
 * @description op表达式，提供了reduce的实现
 */

public class Op extends Expr {

  public Op(Token tok, Type p) {
    super(tok, p);
  }

  /**
   * 会调用op的子类gen返回临时的名字
   * @return
   */
  public Expr reduce() {
    Expr x = gen();
    Temp t = new Temp(type);
    emit(t.toString() + " = " + x.toString());
    return t;
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-14:55
 * @description 实现了双目运算符
 */

public class Arith extends Op {

  public Expr expr1, expr2;

  public Arith(Token tok, Expr x1, Expr x2) {
    super(tok, null);
    expr1 = x1;
    expr2 = x2;
    type = Type.max(expr1.type, expr2.type);
    if (type == null) {
      error("type error");
    }
  }

  /**
   * 把表达式规约为地址
   * @return
   */
  @Override
  public Expr gen() {
    return new Arith(op, expr1.reduce(), expr2.reduce());
  }

  @Override
  public String toString() {
    return expr1.toString() + " " + op.toString() + " "
        + expr2.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-14:50
 * @description 表达式生成的临时名字
 */

public class Temp extends Expr {

  static int count = 0;
  int number;

  public Temp(Type p) {
    super(Word.temp, p);
    number = ++count;
  }

  @Override
  public String toString() {
    return "t" + number;
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:07
 * @description 处理单目运算符
 */
public class Unary extends Op {

  public Expr expr;

  public Unary(Token tok, Expr x) { //处理单目减法，对!的处理见Not.
    super(tok, null);
    expr = x;
    type = Type.max(Type.Int, expr.type);
    if (type == null) {
      error("type error");
    }
  }

  @Override
  public Expr gen() {
    return new Unary(op, expr.reduce());
  }

  @Override
  public String toString() {
    return op.toString() + expr.toString();
  }
}

A.6 布尔表达式的跳转代码

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:12
 * @description 布尔表达式
 */

public class Constant extends Expr {

  public Constant(Token tok, Type p) {
    super(tok, p);
  }

  public Constant(int i) {
    super(new Num(i), Type.Int);
  }

  // Constants for boolean true and false
  public static final Constant True = new Constant(Word.True, Type.Bool);
  public static final Constant False = new Constant(Word.False, Type.Bool);

  // Method for handling jumps
  public void jumping(int t, int f) {
    if (this == True && t != 0) {
      emit("goto L" + t);
    } else if (this == False && f != 0) {
      emit("goto L" + f);
    }
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:23
 * @description 为Or、And、Not 提供一些常见功能
 */

public class Logical extends Expr {

  public Expr expr1, expr2;

  public Logical(Token tok, Expr x1, Expr x2) {
    super(tok, null);
    expr1 = x1;
    expr2 = x2;
    type = check(expr1.type, expr2.type);
    if (type == null) {
      error("type error");
    }
  }

  // Set type to null initially
  public Logical() {
    super(null, null);
  }

  public Type check(Type p1, Type p2) {
    if (p1 == Type.Bool && p2 == Type.Bool) {
      return Type.Bool;
    } else {
      return null;
    }
  }

  @Override
  public Expr gen() {
    int f = newlabel();
    int a = newlabel();
    Temp temp = new Temp(type);
    this.jumping(0, f);
    emit(temp.toString() + " = true");
    emit("goto L" + a);
    emitlabel(f);
    emit(temp.toString() + " = false");
    emitlabel(a);
    return temp;
  }

  @Override
  public String toString() {
    return expr1.toString() + " " + op.toString() + " " + expr2.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:29
 * @description or运算符
 */

public class Or extends Logical {

  public Or(Token tok, Expr x1, Expr x2) {
    super(tok, x1, x2);
  }

  @Override
  public void jumping(int t, int f) {
    int label = (t != 0) ? t : newlabel();
    //如果 expr1 为真：
    //
    //expr1.jumping(label, 0); 会跳转到 label（如果 t != 0）或者在当前位置插入 label（如果 t == 0）。
    //因为 expr1 为真，逻辑 or 表达式的结果也是真，所以程序不会执行 expr2.jumping(t, f);。

    //如果 expr1 为假：
    //
    //expr1.jumping(label, 0); 的假条件跳转目标是 0，表示无需跳转，继续执行后面的代码。
    //expr2.jumping(t, f); 会执行，决定逻辑 or 表达式的最终结果。
    expr1.jumping(label, 0);
    expr2.jumping(t, f);
    //如果 t 等于 0，这意味着在 expr1.jumping(label, 0); 中生成了一个新的标签 label。
    //emitlabel(label); 在当前位置插入 label，确保程序执行到这里时，能够在适当位置标记这个标签。
    if (t == 0) {
      emitlabel(label);
    }
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-15:50
 * @description and运算符
 */

public class And extends Logical {
  public And(Token tok, Expr x1, Expr x2) {
    super(tok, x1, x2);
  }

  @Override
  public void jumping(int t, int f) {
    //如果 f 不为 0，说明已经有一个假条件的跳转目标，因此将 label 设置为 f。
    //如果 f 为 0，即没有明确的假条件跳转目标，那么通过调用 newlabel() 生成一个新的标签，并将其赋值给 label。
    // 这样就确保了在需要插入假条件跳转目标时，有一个正确的标签可用。
    int label = (f != 0) ? f : newlabel();
    //这里将真条件设置为 0，表示不做跳转。
    //假条件设置为 label，即如果第一个表达式 expr1 的值为假，则跳转到 label。
    expr1.jumping(0, label);
    //这里传入的真条件和假条件与方法参数相同，保持了一致性。
    expr2.jumping(t, f);
    //如果传入的假条件 f 为 0，表示没有明确的假条件跳转目标。
    //在这种情况下，通过插入 label 标签，可以确保在需要跳转时有一个正确的标签可用，以确保程序的正确性。
    if (f == 0) {
      emitlabel(label);
    }
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-16:50
 * @description not 单目运算符
 */

public class Not extends Logical {
  public Not(Token tok, Expr x2) {
    super(tok, x2, x2);
  }

  public void jumping(int t, int f) {
    expr2.jumping(f, t);
  }

  public String toString() {
    return op.toString() + " " + expr2.toString();
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-16:59
 * @description 运算符<、<=、==、!=、>=、>
 */

public class Rel extends Logical {
  public Rel(Token tok, Expr x1, Expr x2) {
    super(tok, x1, x2);
  }

  public Type check(Type p1, Type p2) {
    if (p1 instanceof Array || p2 instanceof Array)
      return null;
    else if (p1 == p2)
      return Type.Bool;
    else
      return null;
  }

  public void jumping(int t, int f) {
    Expr a = expr1.reduce();
    Expr b = expr2.reduce();
    String test = a.toString() + " " + op.toString() + " " + b.toString();
    emitJumps(test, t, f);
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-17:24
 * @description 支持数组中的布尔值表达式
 */

public class Access extends Op {
  public Id array;
  public Expr index;

  public Access(Id a, Expr i, Type p) {
    super(new Word("[]", Tag.INDEX), p);
    array = a;
    index = i;
  }

  public Expr gen() {
    return new Access(array, index.reduce(), type);
  }

  public void jumping(int t, int f) {
    emitJumps(reduce().toString(), t, f);
  }

  public String toString() {
    return array.toString() + "[" + index.toString() + "]";
  }
}

A.7 语句的中间代码

package inter;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-17:56
 * @description 语句构造模板，需要子类实现
 */

public class Stmt extends Node {

  public Stmt() {}

  public static Stmt Null = new Stmt();

  public void gen(int b, int a) {}//调用时的参数是语句开始处的标号和语句的下一条指令的标号

  int after = 0;// 保存语句的下一条指令的标号

  public static Stmt Enclosing = Stmt.Null;  // 用于break语句

}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-18:06
 * @description if语句
 */

public class If extends Stmt {

  Expr expr;
  Stmt stmt;

  public If(Expr x, Stmt s) {
    expr = x;
    stmt = s;
    if (expr.type != Type.Bool)
      expr.error("boolean required in if");
  }

  public void gen(int b, int a) {
    int label = newlabel(); // stmt的代码的标号
    // 为真时控制流穿越，为假时转向a
    expr.jumping(0, a);
    emitlabel(label);
    //判断传入的S
    stmt.gen(label, a);
  }
}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-20:48
 * @description else语句
 * 1. 生成标签：
 *
 * label1 和 label2 分别用于 if 和 else 分支的开始位置。
 * 2. 处理条件跳转：
 *
 * expr.jumping(0, label2);：生成条件跳转代码。如果条件表达式为假，控制流跳转到 label2，即 else 分支的位置。
 * 3. 生成 if 分支代码：
 *
 * emitlabel(label1);：在当前位置插入 label1 标签，表示 if 分支的开始。
 * stmt1.gen(label1, a);：调用 stmt1 的 gen 方法生成 if 分支的代码。
 * emit("goto L" + a);：生成无条件跳转，确保 if 分支执行完后，控制流直接跳转到 a，即整个 if-else 语句之后的位置。这一步非常重要，因为它避免了在 if 分支执行完之后继续执行 else 分支的代码。
 * 4. 生成 else 分支代码：
 *
 * emitlabel(label2);：在当前位置插入 label2 标签，表示 else 分支的开始。
 * stmt2.gen(label2, a);：调用 stmt2 的 gen 方法生成 else 分支的代码。因为 else 分支是最后一部分代码，所以执行完后自然会跳转到 a。
 */

public class Else extends Stmt {
  Expr expr;
  Stmt stmt1, stmt2;

  public Else(Expr x, Stmt s1, Stmt s2) {
    expr = x;
    stmt1 = s1;
    stmt2 = s2;
    if (expr.type != Type.Bool)
      expr.error("boolean required in if");
  }

  /**
   *
   * @param b
   * @param a 指示整个 if-else 结构结束后的继续位置。
   */
  public void gen(int b, int a) {
    int label1 = newlabel(); // label1用于语句stmt1 if
    int label2 = newlabel(); // label2用于语句stmt2 else

    expr.jumping(0, label2); //为真时控制流穿越到stmt1 // 为假时跳转到label2，即stmt2
    emitlabel(label1);//插入标签 label1
    stmt1.gen(label1, a); // 为真时执行stmt1
    emit("goto L" + a); // 执行完stmt1后跳转到a //插入无条件跳转到标签 a 的代码，确保 stmt1 执行完后跳转到 a。
    emitlabel(label2);//插入标签 label2
    //在 else 分支中，stmt2.gen(label2, a); 生成的代码会在 else 分支执行完后自然结束，并且顺序执行到 a 位置。
    // 如果再加上 emit("goto L" + a);，会多此一举。
    stmt2.gen(label2, a); // 为假时执行stmt2
  }
}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-21:07
 * @description While语句
 */

public class While extends Stmt {
  Expr expr;
  Stmt stmt;

  public While() {
    expr = null;
    stmt = null;
  }

  public void init(Expr x, Stmt s) {
    expr = x;
    stmt = s;
    if (expr.type != Type.Bool)
      expr.error("boolean required in while");
  }

  public void gen(int b, int a) {
    after = a; // 保存标号 a
    expr.jumping(0, a); // 为假时跳转到 a
    int label = newlabel(); // 用于 stmt 的标号
    emitlabel(label);
    stmt.gen(label, b);
    emit("goto L" + b);
  }
}

package inter;

import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:26
 * @description do-while语句
 * 对比 Do 和 While
 * While 循环：先判断条件，然后执行循环体；如果条件为假，跳出循环。
 * Do-While 循环：先执行循环体，然后判断条件；如果条件为真，重新进入循环体，否则跳出循环。
 */

public class Do extends Stmt {
  Expr expr;
  Stmt stmt;

  public Do() {
    expr = null;
    stmt = null;
  }

  public void init(Stmt s, Expr x) {
    stmt = s;
    expr = x;
    if (expr.type != Type.Bool)
      expr.error("boolean required in do");
  }

  public void gen(int b, int a) {
    after = a; // 保存标号 a
    int label = newlabel(); // 用于 expr 的标号
    stmt.gen(b, label); // 生成循环体的代码，起始标签为 b，结束标签为 label
    emitlabel(label); // 插入 label 标签，表示条件判断的位置
    expr.jumping(b, 0); // 生成条件跳转代码，如果条件表达式为真，跳转到标签 b（即循环开始），否则跳出循环。
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:32
 * @description 类 set 实现了左部为标识符且右部为一个表达式的赋值语句
 */

public class Set extends Stmt {
  public Id id; //表示赋值语句中的左值，即变量名
  public Expr expr; //表示赋值语句中的右值，即表达式

  public Set(Id i, Expr x) {
    id = i;
    expr = x;
    if (check(id.type, expr.type) == null)
      error("type error");
  }

  public Type check(Type p1, Type p2) {
    if (Type.numeric(p1) && Type.numeric(p2))
      return p2;
    else if (p1 == Type.Bool && p2 == Type.Bool)
      return p2;
    else
      return null;
  }

  /**
   * 生成三地址指令
   * @param b
   * @param a
   */
  public void gen(int b, int a) {
    emit(id.toString() + " = " + expr.gen().toString());
  }
}

package inter;

import lexer.*;
import symbols.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:37
 * @description 类SetElem 实现了对数组元素的赋值。
 */

public class SetElem extends Stmt {
  public Id array;//表示数组变量
  public Expr index;//表示数组的索引
  public Expr expr;//表示赋值语句中的右值表达式

  public SetElem(Access x, Expr y) {
    array = x.array;
    index = x.index;
    expr = y;
    if (check(x.type, expr.type) == null)
      error("type error");
  }

  public Type check(Type p1, Type p2) {
    if (p1 instanceof Array || p2 instanceof Array)
      return null;
    else if (p1 == p2)
      return p2;
    else if (Type.numeric(p1) && Type.numeric(p2))
      return p2;
    else
      return null;
  }

  public void gen(int b, int a) {
    String s1 = index.reduce().toString();
    String s2 = expr.reduce().toString();
    emit(array.toString() + "[" + s1 + "] = " + s2);
  }
}

package inter;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:43
 * @description 类 seq 实现了一个语句序列
 */

public class Seq extends Stmt {
  Stmt stmt1;
  Stmt stmt2;

  public Seq(Stmt s1, Stmt s2) {
    stmt1 = s1;
    stmt2 = s2;
  }

  /**
   * 如果第一个语句为空：直接生成第二个语句的代码。
   * 如果第二个语句为空：直接生成第一个语句的代码。
   * 如果两个语句都不为空：
   * 为第一个语句生成代码并在其后插入一个新的标签。
   * 输出标签，表示第一个语句执行完毕并转到下一个标签。
   * 生成第二个语句的代码，使用生成的标签作为起始点，并将控制流转移到结束标签 a。
   * @param b 开始
   * @param a 结束
   */
  public void gen(int b, int a) {
    if (stmt1 == Stmt.Null) {
      stmt2.gen(b, a);
    } else if (stmt2 == Stmt.Null) {
      stmt1.gen(b, a);
    } else {
      int label = newlabel();
      stmt1.gen(b, label);
      emitlabel(label);
      stmt2.gen(label, a);
    }
  }
}

package inter;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/24-9:51
 * @description 一个break 语句把控制流转出它的外围循环或外围switch 语句。
 */

public class Break extends Stmt {
  Stmt stmt;

  public Break() {
    if (Stmt.Enclosing == Stmt.Null)//如果 Enclosing 为 null，即没有包围的循环或 switch 语句，则报错。
      error("unenclosed break");
    stmt = Stmt.Enclosing;//将 Enclosing 赋值给 stmt，表示 break 语句所属的循环或 switch 语句。
  }

  public void gen(int b, int a) {
    emit("goto L" + stmt.after);
  }
}

A.8 语法分析器

语法分析器读人一个由词法单元组成的流，并调用适当的在A.5~A.7节中讨论的构造函数，构建出一棵抽象语法树。

package parser;

import java.io.*;
import lexer.*;
import symbols.*;
import inter.*;

/**
 * @author heliming
 * @version 1.0
 * @date 2024/5/23-10:48
 * @description 语法分析器读人一个由词法单元组成的流，并调用适当的在A.5~A.7节中讨论的构造函数，构建出一棵抽象语法树。
 * 当前符号表按照2.7节中图 2-38 的翻译方案进行处理。
 */

public class Parser {
  private Lexer lex; //这个语法分析器的词法分析器
  private Token look; // 向前看词法单元
  Env top = null; // 当前或顶层的符号表
  int used = 0; // 用于变量声明的存储位置

  public Parser(Lexer l) throws IOException {
    lex = l;
    move();
  }

  void move() throws IOException {
    look = lex.scan();
  }

  void error(String s) {
    throw new Error("near line " + lex.line + ": " + s);
  }

  /**
   * 在语法分析过程中检查当前词法单元的标签是否符合预期，如果不符合则抛出语法错误，符合移动到下个词法单元。
   * @author  heliming
   * @date    2024/5/24-10:35
   * @param t
   * @throws IOException
   */
  void match(int t) throws IOException {
    if (look.tag == t) {
      move();
    } else {
      error("syntax error");
    }
  }

  /**
   * 构建抽象语法树，对整个程序进行语法分析，生成对应的代码，并在适当的位置插入标签以表示程序的开始和结束位置。
   * @author  heliming
   * @date    2024/5/24-10:47
   * @throws IOException
   */
  public void program() throws IOException {
    // program -> block
    Stmt s = block();
    int begin = s.newlabel();
    int after = s.newlabel();
    s.emitlabel(begin);
    s.gen(begin, after);
    s.emitlabel(after);
  }

  /**
   * 代码块分析，解析块语句，包括变量声明部分和语句部分，并在解析过程中维护符号表环境。
   * @author  heliming
   * @date    2024/5/24-10:46
   * @return
   * @throws IOException
   */
  Stmt block() throws IOException {  // block -> { decls stmts }
    match('{');
    Env savedEnv = top;
    top = new Env(top);
    decls();
    Stmt s = stmts();
    match('}');
    top = savedEnv;
    return s;
  }

  /**
   * 循环解析变量声明语句，将解析得到的标识符和类型信息存储到符号表中，并更新使用的位置。
   * 程序中的声明会被处理为符号表中有关标识符的条目。虽然这里没有显示，声明还可能生成在运行时刻为标识符保留存储空间的指令。
   * @author  heliming
   * @date    2024/5/24-10:59
   * @throws IOException
   */
  void decls() throws IOException {
    while (look.tag == Tag.BASIC) {  // D -> type ID ;
      Type p = type();
      Token tok = look;
      match(Tag.ID);
      match(';');
      Id id = new Id((Word) tok, p, used);
      top.put(tok, id);
      used = used + p.width;
    }
  }

  /**
   * 解析变量的类型，如果是基本类型则直接返回，如果是数组类型则调用 dims(p) 方法进行处理。
   * @author  heliming
   * @date    2024/5/24-11:06
   * @return
   * @throws IOException
   */
  Type type() throws IOException {
    Type p = (Type) look;//期望look.tag == Tag.BASIC
    match(Tag.BASIC);
    if (look.tag != '[') {
      return p; // T -> basic
    } else {
      return dims(p); // 返回数组类型
    }
  }

  /**
   * 解析数组类型，包括数组的维度和每个维度的大小，并返回一个表示数组类型的对象。
   * @author  heliming
   * @date    2024/5/24-11:09
   * @param p 数组类型
   * @return
   * @throws IOException
   */
  Type dims(Type p) throws IOException {
    match('[');
    Token tok = look;
    match(Tag.NUM);
    match(']');
    if (look.tag == '[') {
      p = dims(p);
    }
    return new Array(((Num) tok).value, p);
  }

  /**
   * 解析语句序列，直到遇到右大括号 } 结束，返回一个包含所有解析的语句的序列对象。
   * @author  heliming
   * @date    2024/5/24-11:11
   * @return
   * @throws IOException
   */
  Stmt stmts() throws IOException {
    //如果当前词法单元的标签是右大括号 }，表示语句序列结束，返回空语句 Stmt.Null。
    if (look.tag == '}') {
      return Stmt.Null;
    } else {
      //如果当前词法单元的标签不是右大括号 }，表示还有语句需要解析，调用 stmt() 方法解析当前语句，
      // 并递归调用 stmts() 方法解析下一个语句，然后返回一个新的序列语句对象 Seq，连接当前语句和下一个语句
      return new Seq(stmt(), stmts());
    }
  }

  /**
   * 非终结符号stmt的各个产生式 switch
   * @author  heliming
   * @date    2024/5/24-11:18
   * @return
   * @throws IOException
   */
  Stmt stmt() throws IOException {
    Expr x;
    Stmt s, s1, s2;
    Stmt savedStmt; // 用于为break语句保存外层的循环语句

    switch (look.tag) {
      case ';':
        move();
        return Stmt.Null;
      case Tag.IF:
        match(Tag.IF);
        match('(');
        x = bool();
        match(')');
        s1 = stmt();
        if (look.tag != Tag.ELSE) {
          return new If(x, s1);
        }
        match(Tag.ELSE);
        s2 = stmt();
        return new Else(x, s1, s2);
      case Tag.WHILE:
        While whilenode = new While();
        //保存当前的外层语句节点，这在处理嵌套循环时会很有用。
        savedStmt = Stmt.Enclosing;
        //将当前的 while 循环语句节点设置为外层语句节点，以便在内部语句中可以访问到外层的 while 循环节点。
        Stmt.Enclosing = whilenode;
        match(Tag.WHILE);
        match('(');
        x = bool();
        match(')');
        s1 = stmt();
        whilenode.init(x, s1);
        Stmt.Enclosing = savedStmt; // 重置Stmt.Enclosing 恢复之前保存的外层语句节点，保持语法树的正确结构。
        return whilenode;
      case Tag.DO:
        Do donode = new Do();
        savedStmt = Stmt.Enclosing;
        Stmt.Enclosing = donode;
        match(Tag.DO);
        s1 = stmt();
        match(Tag.WHILE);
        match('(');
        x = bool();
        match(')');
        match(';');
        donode.init(s1, x);
        Stmt.Enclosing = savedStmt; // 重置Stmt.Enclosing
        return donode;
      case Tag.BREAK:
        match(Tag.BREAK);
        match(';');
        return new Break();
      case '{':
        return block();
      default:
        return assign();
    }
  }

  /**
   * 赋值语句的语法解析
   * @author  heliming
   * @date    2024/5/24-13:30
   * @return
   * @throws IOException
   */
  Stmt assign() throws IOException {
    Stmt stmt;
    Token t = look;
    match(Tag.ID);
    Id id = top.get(t);
    if (id == null) error(t.toString() + " undeclared");
    if (look.tag == '=') { // S -> id = E
      move();
      stmt = new Set(id, bool());
    } else { // S -> L = E
      Access x = offset(id);
      match('=');
      stmt = new SetElem(x, bool());
    }
    match(';');
    return stmt;
  }

  /**
   * 解析布尔表达式
   * @author  heliming
   * @date    2024/5/24-13:01
   * @return
   * @throws IOException
   */
  Expr bool() throws IOException {
    Expr x = join();
    while (look.tag == Tag.OR) {
      Token tok = look;
      move();
      x = new Or(tok, x, join());
    }
    return x;
  }

  /**
   * 解析包含逻辑与操作 (AND) 的布尔表达式
   * @author  heliming
   * @date    2024/5/24-13:02
   * @return
   * @throws IOException
   */
  Expr join() throws IOException {
    Expr x = equality();
    while (look.tag == Tag.AND) {
      Token tok = look;
      move();
      x = new And(tok, x, equality());
    }
    return x;
  }

  /**
   * 解析相等性表达式，例如 == 和 != 运算符
   * @author  heliming
   * @date    2024/5/24-13:09
   * @return
   * @throws IOException
   */
  Expr equality() throws IOException {
    Expr x = rel();
    while (look.tag == Tag.EQ || look.tag == Tag.NE) {
      Token tok = look;
      move();
      x = new Rel(tok, x, rel());
    }
    return x;
  }

  /**
   * 解析包含关系运算符 (<, <=, >=, >) 的表达式
   * @author  heliming
   * @date    2024/5/24-13:03
   * @return
   * @throws IOException
   */
  Expr rel() throws IOException {
    Expr x = expr();
    switch (look.tag) {
      case '<':
      case Tag.LE:
      case Tag.GE:
      case '>':
        Token tok = look;
        move();
        return new Rel(tok, x, expr());
      default:
        return x;
    }
  }

  /**
   * 解析了包含加法和减法运算符的表达式
   * @author  heliming
   * @date    2024/5/24-13:05
   * @return
   * @throws IOException
   */
  Expr expr() throws IOException {
    Expr x = term();
    while (look.tag == '+' || look.tag == '-') {
      Token tok = look;
      move();
      x = new Arith(tok, x, term());
    }
    return x;
  }

  /**
   * 解析包含乘法 (*) 和除法 (/) 运算符的表达式
   * @author  heliming
   * @date    2024/5/24-13:05
   * @return
   * @throws IOException
   */
  Expr term() throws IOException {
    Expr x = unary();
    while (look.tag == '*' || look.tag == '/') {
      Token tok = look;
      move();
      x = new Arith(tok, x, unary());
    }
    return x;
  }

  /**
   * 解析一元运算符的表达式，例如负号（-）和逻辑非运算符（!）
   * @author  heliming
   * @date    2024/5/24-13:06
   * @return
   * @throws IOException
   */
  Expr unary() throws IOException {
    if (look.tag == '-') {
      move();
      return new Unary(Word.minus, unary());
    } else if (look.tag == '!') {
      Token tok = look;
      move();
      return new Not(tok, unary());
    } else {
      return factor();
    }
  }

  /**
   * 解析基本表达式，如括号内的表达式、常量和标识符
   * @author  heliming
   * @date    2024/5/24-13:08
   * @return
   * @throws IOException
   */
  Expr factor() throws IOException {
    Expr x = null;
    switch (look.tag) {
      case '(':
        move();
        x = bool();
        match(')');
        return x;
      case Tag.NUM:
        x = new Constant(look, Type.Int);
        move();
        return x;
      case Tag.REAL://如果标签是实数，则表示一个浮点常量
        x = new Constant(look, Type.Float);
        move();
        return x;
      case Tag.TRUE:
        x = Constant.True;
        move();
        return x;
      case Tag.FALSE:
        x = Constant.False;
        move();
        return x;
      case Tag.ID://如果标签是标识符，则可能是变量或数组访问。
        String s = look.toString();
        Id id = top.get(look);
        if (id == null) error(look.toString() + " undeclared");
        move();
        if (look.tag != '[') {
          return id;
        } else {
          return offset(id);
        }
      default:
        error("syntax error");
        return x;
    }
  }

  /**
   * 对数组元素的访问，支持多维数组
   * @author  heliming
   * @date    2024/5/24-13:14
   * @param a
   * @return
   * @throws IOException
   */
  Access offset(Id a) throws IOException {  // I -> [E] | [E] I
    Expr i;
    Expr w;
    Expr t1, t2;
    Expr loc; // 继承 id
    Type type = a.type; // 第一个下标 ，I -> [E]
    match('[');
    i = bool();
    match(']');
    type = ((Array) type).of;
    w = new Constant(type.width);
    t1 = new Arith(new Token('*'), i, w);
    loc = t1;

    // 多维下标，I -> [E] I
    while (look.tag == '[') {
      match('[');
      i = bool();
      match(']');
      type = ((Array) type).of;
      w = new Constant(type.width);
      t1 = new Arith(new Token('*'), i, w);
      t2 = new Arith(new Token('+'), loc, t1);
      loc = t2;
    }
    return new Access(a, loc, type);
  }

}

A.9 创建前端

javac lexer/*.java
javac symbols/*.java
javac inter/*.java
javac parser/*.java
javac main/*.java

上面的 javac 命令为每个类创建了.class 文件。要练习使用我们的翻译器，只需要输人java main.Main，后面跟上将要被翻译的源程序，比如文件 test 中的内容:

{
    int i;
    int j;
    float v;
    float x;
    float[100] a;

    while (true) {
    do i = i + 1; while (a[i] < v);
    do j = j - 1; while (a[j] > v);
    if (i >= j) break;
    x = a[i];
    a[i] = a[j];
    a[j] = x;
    }
  }

对于这个输人，这个前端输出:

L1:L3:    i = i + 1
L5:    t1 = i * 8
    t2 = a[t1]
    if t2 < v goto L3
L4:    j = j - 1
L7:    t3 = j * 8
    t4 = a[t3]
    if t4 > v goto L4
L6:    iffalse i >= j goto L8
L9:    goto L2
L8:    t5 = i * 8
    x = a[t5]
L10:    t6 = i * 8
    t7 = j * 8
    t8 = a[t7]
    a[t6] = t8
L11:    t9 = j * 8c
    a[t9] = x
    goto L1
L2:

如果中间代码变为c是这样的

#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>

int main() {
    int i = 0;
    int j = 0;
    float v = 0.0;
    float x = 0.0;
    float a[100] = {0.0};
    // 打印排序前的数组a的内容，额外加的方便看内容
    printf("排序前的数组a为：\n");
    int k; // 在循环外声明变量
    // 填充数组a的随机数
    for ( k = 0; k < 100; k++) {
        a[k] = (float)rand() / RAND_MAX * 100.0; // 生成0到100之间的随机数
    }
    j = 30;                 }
    v = 30;
    for ( k = 0; k < 100; k++) {
        printf("%.2f ", a[k]);
    }
    printf("\n");
    
    while (true) {
        do {
            i = i + 1;
        } while (a[i] < v);

        do {
            j = j - 1;
        } while (a[j] > v);

        if (i >= j) {
            break;
        }

        x = a[i];
        a[i] = a[j];
        a[j] = x;
    }
  // 打印排序后的数组a的内容，额外加的方便看内容
    printf("排序后的数组a为：\n");
    for ( k = 0; k < 100; k++) {
        printf("%.2f ", a[k]);
    }
    printf("\n");
    
    return 0;
}

编译
gcc -o main main.c
运行
./main

编译原理：一个完整的编译器前端
阿明
撰写于 2024年 05月 24 日

一个完整的编译器前端

A.1源语言

A.2 Main

A.3 词法分析器

A.4 符号表和类型

A.5 表达式的中间代码

A.6 布尔表达式的跳转代码

A.7 语句的中间代码

A.8 语法分析器

A.9 创建前端

编译原理：一个完整的编译器前端

一个完整的编译器前端

A.1源语言

A.2 Main

A.3 词法分析器

A.4 符号表和类型

A.5 表达式的中间代码

A.6 布尔表达式的跳转代码

A.7 语句的中间代码

A.8 语法分析器

A.9 创建前端

微信分享二维码

评论区(暂无评论)

啊哦，评论功能已关闭～

编译原理：一个完整的编译器前端 阿明 撰写于 2024年 05月 24 日

一个完整的编译器前端

A.1源语言

A.2 Main

A.3 词法分析器

A.4 符号表和类型

A.5 表达式的中间代码

A.6 布尔表达式的跳转代码

A.7 语句的中间代码

A.8 语法分析器

A.9 创建前端

编译原理：一个完整的编译器前端

一个完整的编译器前端

A.1源语言

A.2 Main

A.3 词法分析器

A.4 符号表和类型

A.5 表达式的中间代码

A.6 布尔表达式的跳转代码

A.7 语句的中间代码

A.8 语法分析器

A.9 创建前端

微信分享二维码

评论区(暂无评论)

啊哦，评论功能已关闭～

编译原理：一个完整的编译器前端
阿明
撰写于 2024年 05月 24 日