课程回顾
Swarthmore学院16年开的编译系统课,总共10次大作业。本随笔记录了相关的课堂笔记以及第4次大作业。
A-Normal Form
在80年代,函数式语言编译器主要使用Continuation-passing style(CPS)作为中间代码表示形式。 1992年Sabry和Felleisen引入了另一种和CPS一样简单的表示形式:A-normal form(ANF),并且证明了:使用ANF作为中间代码表示形式能够和使用CPS一样容易生成汇编代码并进行代码优化。
编程作业
本次大作业是为Boa编程语言实现一个小型编译器,其编译过程为:boa源代码 -> expr(user-facing) -> aexpr(compiler-facing) -> x86_32汇编。
具体语法
boa源代码<expr> := | let <bindings> in <expr> | if <expr>: <expr> else: <expr> | <binop-expr> <binop-expr> := | <number> | <identifier> | add1(<expr>) | sub1(<expr>) | <expr> + <expr> | <expr> - <expr> | <expr> * <expr> | ( <expr> ) <bindings> := | <identifier> = <expr> | <identifier> = <expr>, <bindings>
抽象语法
expr(user-facing)type prim1 = | Add1 | Sub1 type prim2 = | Plus | Minus | Times type expr = | ELet of (string * expr) list * expr | EPrim1 of prim1 * expr | EPrim2 of prim2 * expr * expr | EIf of expr * expr * expr | ENumber of int | EId of string
aexpr(compiler-facing)
type immexpr = | ImmNumber of int | ImmId of string and cexpr = | CPrim1 of prim1 * immexpr | CPrim2 of prim2 * immexpr * immexpr | CIf of immexpr * aexpr * aexpr | CImmExpr of immexpr and aexpr = | ALet of string * cexpr * aexpr | ACExpr of cexpr
程序例子
boa expr aexpr Answer sub1(5) EPrim1(Sub1, ENum(5)) - 4 if 5 - 5: 6 else: 8 EIf(EPrim2(Minus, ENum(5), ENum(5)), ENum(6), ENum(8)) - 8 let x = 10, y = 9 in
if (x - y) * 2: x else: yELet([("x", ENum(10)), ("y", ENum(9))],
EIf(EPrim2(Times, EPrim2(Minus, EId("x"), EId("y")), ENum(2)),
EId("x"),
EId("y")))- 10 (5 + 4) + (3 + 2) - - 14 let x = (let y=10 in y) in x - - 10 expr -> aexpr
let rec anf_k (e : expr) (k : immexpr -> aexpr) : aexpr = match e with | EPrim1(op, e) -> let tmp = gen_temp "unary" in anf_k e (fun imm -> ALet(tmp, CPrim1(op, imm), k (ImmId(tmp)))) | ELet(binds, body) -> let rec helper binds = match binds with | [] -> anf_k body k | (id, e)::rest -> anf_k e (fun imm -> ALet(id, CImmExpr(imm), (helper rest))) in helper binds | EPrim2(op, left, right) -> let tmp = gen_temp "binary" in anf_k left (fun limm -> anf_k right (fun rimm -> ALet(tmp, CPrim2(op, limm, rimm), k (ImmId(tmp))))) | EIf(cond, thn, els) -> let tmp = gen_temp "if" in let ret = (fun imm -> ACExpr(CImmExpr(imm))) in anf_k cond (fun immcond -> ALet(tmp, CIf(immcond, anf_k thn ret, anf_k els ret), (k (ImmId(tmp))))) | ENumber(n) -> (k (ImmNumber(n))) | EId(name) -> (k (ImmId(name)))
aexpr -> x86_32
let acompile_imm_arg (i : immexpr) _ (env : (string * int) list) : arg = match i with | ImmNumber(n) -> Const(n) | ImmId(name) -> match (find env name) with | Some(si) -> RegOffset((-4) * si, ESP) | None -> failwith (sprintf "An identifier is unbound (there is no surrounding let binding for %s)" name) let acompile_imm (i : immexpr) (si : int) (env : (string * int) list) : instruction list = [ IMov(Reg(EAX), acompile_imm_arg i si env) ] let rec acompile_step (s : cexpr) (si : int) (env : (string * int) list) : instruction list = match s with | CImmExpr(i) -> acompile_imm i si env | CPrim1(op, e) -> let prelude = acompile_imm e si env in begin match op with | Add1 -> prelude @ [ IAdd(Reg(EAX), Const(1)) ] | Sub1 -> prelude @ [ IAdd(Reg(EAX), Const(-1)) ] end | CPrim2(op, left, right) -> let prelude = acompile_imm left si env in let arg = acompile_imm_arg right si env in begin match op with | Plus -> prelude @ [ IAdd(Reg(EAX), arg) ] | Minus -> prelude @ [ ISub(Reg(EAX), arg) ] | Times -> prelude @ [ IMul(Reg(EAX), arg) ] end | CIf(cond, thn, els) -> let tmp_else = gen_temp "else" in let tmp_endif = gen_temp "endif" in (acompile_imm cond si env) @ [ ICmp(Reg(EAX), Const(0)); IJe(tmp_else); ] @ (acompile_expr thn si env) @ [ IJmp(tmp_endif); ILabel(tmp_else); ] @ (acompile_expr els si env) @ [ ILabel(tmp_endif); ] and acompile_expr (e : aexpr) (si : int) (env : (string * int) list) : instruction list = match e with | ALet(id, e, body) -> let prelude = acompile_step e (si + 1) env in let body = acompile_expr body (si + 1) ((id, si)::env) in prelude @ [ IMov(RegOffset(-4 * si, ESP), Reg(EAX)) ] @ body | ACExpr(s) -> acompile_step s si env