Lua源码解析之三:code

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/Lunar_lty/article/details/77100210

1. 前言


何为CODE?在Proto这一结构体中,有一个字段code:

/*
** Function Prototypes
*/
typedef struct Proto {
  CommonHeader;
  lu_byte numparams;  /* number of fixed parameters */
  lu_byte is_vararg;  /* 2: declared vararg; 1: uses vararg */
  lu_byte maxstacksize;  /* number of registers needed by this function */
  int sizeupvalues;  /* size of 'upvalues' */
  int sizek;  /* size of 'k' */
  int sizecode;
  int sizelineinfo;
  int sizep;  /* size of 'p' */
  int sizelocvars;
  int linedefined;  /* debug information  */
  int lastlinedefined;  /* debug information  */
  TValue *k;  /* constants used by the function */
  Instruction *code;  /* opcodes */
  struct Proto **p;  /* functions defined inside the function */
  int *lineinfo;  /* map from opcodes to source lines (debug information) */
  LocVar *locvars;  /* information about local variables (debug information) */
  Upvaldesc *upvalues;  /* upvalue information */
  struct LClosure *cache;  /* last-created closure with this prototype */
  TString  *source;  /* used for debug information */
  GCObject *gclist;
} Proto;

用于保存函数运行时的指令。Lua在解析函数的过程中,会将一条条语句逐个‘编译’成指令,这些指令就存放在Proto->code这个成员里。除了code成员,还有几个重要的成员变量:
sizecode: 本函数的指令个数
numparams: 定参个数
is_vararg: 是否支持变参:1 表示使用了变参作为最后一个参数
maxstacksize: 本函数最多使用了多少个寄存器
k: 常量表
sizek: 常量表大小
p: 子函数表
sizep: 子函数大小

在解析整个函数结束后,会生成一个常量表,用于保存函数体内的常量字符串和数字,指令放在code里面,函数体内的子函数放在p数组里。

在上一篇中,介绍了函数的解析,包括词法和语法分析(语法分析只是讲了个大概: ) ),本篇将从解析语句开始,洞悉Lua源代码如何一边解析,一边生成code。


2. 解析语句(statement)

2.0 instructor的内部表达

在“Lua源码解析之一”中,有介绍到,lua的指令(instructor),lua指令由一个32位的unsigned int组成,其中:
6位用于表示OPCODE,即操作指令
8位用于表示操作数A,9位用于表示操作数B,9位用于表示操作数C
为了方面进行位操作,lopcodes.h定义了一系列的宏和辅助函数,非常方便使用,假如你想生成一条指令:其命令码为opcode,操作码分别为A,B,C,可以这样写:
luaK_codeABC(fs, opcode, A, B, C);
宏CREATE_ABC(o, a, b, c)可以直接生成这个int32的整形数值。
在lcode.c文件中,大部分函数都对特定的指令进行了编码。
有关这些指令的编码,后面会有详解。

2.1 if statement

先来看看if-elseif-then,这样的语句是如何处理的:

static void ifstat (LexState *ls, int line) {
  /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */
  FuncState *fs = ls->fs;
  int escapelist = NO_JUMP;  /* exit list for finished parts */
  test_then_block(ls, &escapelist);  /* IF cond THEN block */
  while (ls->t.token == TK_ELSEIF)
    test_then_block(ls, &escapelist);  /* ELSEIF cond THEN block */
  if (testnext(ls, TK_ELSE))
    block(ls);  /* 'else' part */
  check_match(ls, TK_END, TK_IF, line);
  luaK_patchtohere(fs, escapelist);  /* patch escape list to 'if' end */
}

先处理if-then语句,然后再检测,如果是elseif的化,接着处理elseif-then语句,直到遇到else-end语句。
接着查看test_then_block代码

/*
 * 解析 if-then 语句
 */
static void test_then_block (LexState *ls, int *escapelist) {
  /* test_then_block -> [IF | ELSEIF] cond THEN block */
  BlockCnt bl;
  FuncState *fs = ls->fs;
  expdesc v;
  int jf;  /* instruction to skip 'then' code (if condition is false) */
  luaX_next(ls);  /* skip IF or ELSEIF */
  expr(ls, &v);  /* read condition */
  checknext(ls, TK_THEN);
  if (ls->t.token == TK_GOTO || ls->t.token == TK_BREAK) {
    luaK_goiffalse(ls->fs, &v);  /* will jump to label if condition is true */
    enterblock(fs, &bl, 0);  /* must enter block before 'goto' */
    gotostat(ls, v.t);  /* handle goto/break */
    skipnoopstat(ls);  /* skip other no-op statements */
    if (block_follow(ls, 0)) {  /* 'goto' is the entire block? */
      leaveblock(fs);
      return;  /* and that is it */
    }
    else  /* must skip over 'then' part if condition is false */
      jf = luaK_jump(fs);
  }
  else {  /* regular case (not goto/break) */
    luaK_goiftrue(ls->fs, &v);  /* skip over block if condition is false */
    enterblock(fs, &bl, 0);
    /* 设定if判定为false时需要执行的JMP指令(所对应的pc)*/
    jf = v.f;
  }
  statlist(ls);  /* 'then' part */
  leaveblock(fs);
  if (ls->t.token == TK_ELSE ||
      ls->t.token == TK_ELSEIF)  /* followed by 'else'/'elseif'? */
    /* 如果有else或者elseif则需要insert JMP指令,该JMP指令用于跳出接下来的语句 */
    luaK_concat(fs, escapelist, luaK_jump(fs));  /* must jump over it */
  luaK_patchtohere(fs, jf);
}

先跳过token-if,接着读取条件控制表达式expr,将结果保存到v,仔细看看expr:

static void expr (LexState *ls, expdesc *v) {
  subexpr(ls, v, 0);
}

它直接调用subexpr:

/*
** subexpr -> (simpleexp | unop subexpr) { binop subexpr }
** where 'binop' is any binary operator with a priority higher than 'limit'
*/
static BinOpr subexpr (LexState *ls, expdesc *v, int limit) {
  BinOpr op;
  UnOpr uop;
  enterlevel(ls);
  uop = getunopr(ls->t.token);
  if (uop != OPR_NOUNOPR) {
    int line = ls->linenumber;
    luaX_next(ls);
    subexpr(ls, v, UNARY_PRIORITY);
    luaK_prefix(ls->fs, uop, v, line);
  }
  else simpleexp(ls, v);
  /* expand while operators have priorities higher than 'limit' */
  op = getbinopr(ls->t.token);
  while (op != OPR_NOBINOPR && priority[op].left > limit) {
    expdesc v2;
    BinOpr nextop;
    int line = ls->linenumber;
    luaX_next(ls);
    luaK_infix(ls->fs, op, v);
    /* read sub-expression with higher priority */
    nextop = subexpr(ls, &v2, priority[op].right);
    luaK_posfix(ls->fs, op, v, &v2, line);
    op = nextop;
  }
  leavelevel(ls);
  return op;  /* return first untreated operator */
}

查看函数头里面的推导式:subexpr -> (simpleexp | unop subexpr) { binop subexpr },如果前面读到的是一个一元操作符,先利用unop subexpr进行推导,与之对应的代码是:

  uop = getunopr(ls->t.token);
  if (uop != OPR_NOUNOPR) {
    int line = ls->linenumber;
    luaX_next(ls);
    subexpr(ls, v, UNARY_PRIORITY);
    luaK_prefix(ls->fs, uop, v, line);
  }
  else simpleexp(ls, v);

获取到unop之后接着递归调用subexpr,无论如何,递归最终还是得调用simpleexp来终止:

static void simpleexp (LexState *ls, expdesc *v) {
  /* simpleexp -> FLT | INT | STRING | NIL | TRUE | FALSE | ... |
                  constructor | FUNCTION body | suffixedexp */
  switch (ls->t.token) {
    case TK_FLT: {
      init_exp(v, VKFLT, 0);
      v->u.nval = ls->t.seminfo.r;
      break;
    }
    case TK_INT: {
      init_exp(v, VKINT, 0);
      v->u.ival = ls->t.seminfo.i;
      break;
    }
    case TK_STRING: {
      codestring(ls, v, ls->t.seminfo.ts);
      break;
    }
    case TK_NIL: {
      init_exp(v, VNIL, 0);
      break;
    }
    case TK_TRUE: {
      init_exp(v, VTRUE, 0);
      break;
    }
    case TK_FALSE: {
      init_exp(v, VFALSE, 0);
      break;
    }
    case TK_DOTS: {  /* vararg */
      FuncState *fs = ls->fs;
      check_condition(ls, fs->f->is_vararg,
                      "cannot use '...' outside a vararg function");
      fs->f->is_vararg = 1;  /* function actually uses vararg */
      init_exp(v, VVARARG, luaK_codeABC(fs, OP_VARARG, 0, 1, 0));
      break;
    }
    case '{': {  /* constructor */
      constructor(ls, v);
      return;
    }
    case TK_FUNCTION: {
      luaX_next(ls);
      body(ls, v, 0, ls->linenumber);
      return;
    }
    default: {
      suffixedexp(ls, v);
      return;
    }
  }
  luaX_next(ls);
}

simpleexpr中对常量的处理都很简单,直接赋值到v即可。
需注意的是,如果token=TK_STRING,则需要调用luaK_stringK将此string添加到常量表中,并返回其下标。
如果是TK_DOT,表示读到的token为”…”,设定fs->f->is_vararg =1,并设定v的类型为VVARARG。
如果是’{‘,则说明该表达式是一个table的constructor。

2.1.1 table constructor

本小节单独分析constructor是如何解析和编码的:

static void constructor (LexState *ls, expdesc *t) {
  /* constructor -> '{' [ field { sep field } [sep] ] '}'
     sep -> ',' | ';' */
  FuncState *fs = ls->fs;
  int line = ls->linenumber;
  int pc = luaK_codeABC(fs, OP_NEWTABLE, 0, 0, 0);
  struct ConsControl cc;
  cc.na = cc.nh = cc.tostore = 0;
  cc.t = t;
  init_exp(t, VRELOCABLE, pc);
  init_exp(&cc.v, VVOID, 0);  /* no value (yet) */
  luaK_exp2nextreg(ls->fs, t);  /* fix it at stack top */
  checknext(ls, '{');
  do {
    lua_assert(cc.v.k == VVOID || cc.tostore > 0);
    if (ls->t.token == '}') break;
    closelistfield(fs, &cc);
    field(ls, &cc);
  } while (testnext(ls, ',') || testnext(ls, ';'));
  check_match(ls, '}', '{', line);
  lastlistfield(fs, &cc);
  SETARG_B(fs->f->code[pc], luaO_int2fb(cc.na)); /* set initial array size */
  SETARG_C(fs->f->code[pc], luaO_int2fb(cc.nh));  /* set initial table size */
}

它先调用luaK_codeABC生成一条OP_NEWTABLE指令,接着初始化table的数组大小和hash表大小为0,调用init_exp设定t的类型为VRELOCABLE,value=pc,顾名思义,VRELOCABLE,表示该表达式的存放在哪,暂时还没确定下来,需要“重定向”(RELOCABLE)。
为了给t安排一个“好住处”,遇到luaK_exp2nextreg:

void luaK_exp2nextreg (FuncState *fs, expdesc *e) {
  luaK_dischargevars(fs, e);
  freeexp(fs, e);
  luaK_reserveregs(fs, 1);
  exp2reg(fs, e, fs->freereg - 1);
}

freeexp释放e所占用的寄存器,luaK_reserveregs只是简单的将freereg++,重点看下exp2reg函数:

/*
 * 
 */
static void exp2reg (FuncState *fs, expdesc *e, int reg) {
  discharge2reg(fs, e, reg);
  if (e->k == VJMP)
    luaK_concat(fs, &e->t, e->u.info);  /* put this jump in 't' list */
  if (hasjumps(e)) {
    int final;  /* position after whole expression */
    int p_f = NO_JUMP;  /* position of an eventual LOAD false */
    int p_t = NO_JUMP;  /* position of an eventual LOAD true */
    if (need_value(fs, e->t) || need_value(fs, e->f)) {
      int fj = (e->k == VJMP) ? NO_JUMP : luaK_jump(fs);
      p_f = code_label(fs, reg, 0, 1);
      p_t = code_label(fs, reg, 1, 0);
      luaK_patchtohere(fs, fj);
    }
    final = luaK_getlabel(fs);
    patchlistaux(fs, e->f, final, reg, p_f);
    patchlistaux(fs, e->t, final, reg, p_t);
  }
  e->f = e->t = NO_JUMP;
  e->u.info = reg;
  e->k = VNONRELOC;
}

/*
 * 将表达式e'安置'到寄存器中
 */
static void discharge2reg (FuncState *fs, expdesc *e, int reg) {
  luaK_dischargevars(fs, e);
  switch (e->k) {
    case VNIL: {
      luaK_nil(fs, reg, 1);
      break;
    }
    case VFALSE: case VTRUE: {
      luaK_codeABC(fs, OP_LOADBOOL, reg, e->k == VTRUE, 0);
      break;
    }
    case VK: {
      luaK_codek(fs, reg, e->u.info);
      break;
    }
    case VKFLT: {
      luaK_codek(fs, reg, luaK_numberK(fs, e->u.nval));
      break;
    }
    case VKINT: {
      luaK_codek(fs, reg, luaK_intK(fs, e->u.ival));
      break;
    }
    case VRELOCABLE: {
      Instruction *pc = &getcode(fs, e);
      SETARG_A(*pc, reg);
      break;
    }
    case VNONRELOC: {
      if (reg != e->u.info)
        luaK_codeABC(fs, OP_MOVE, reg, e->u.info, 0);
      break;
    }
    default: {
      lua_assert(e->k == VVOID || e->k == VJMP);
      return;  /* nothing to do... */
    }
  }
  e->u.info = reg;
  e->k = VNONRELOC;
}

先调用discharge2reg将表达式e安置在寄存器reg中,然后判定e是否为跳转指令,由于OPCODE=OP_NEWTABLE,且e->f=t=NO_JUMP,所以不存在跳转,进行相应的赋值后函数返回:

 e->u.info = reg; e->k = VNONRELOC;

接着checknext(ls, ‘{‘):

  do {
    lua_assert(cc.v.k == VVOID || cc.tostore > 0);
    if (ls->t.token == '}') break;
    closelistfield(fs, &cc);
    field(ls, &cc);
  } while (testnext(ls, ',') || testnext(ls, ';'));

在do-while语句中,如果读到’}’,说明constructor结束了,break
closelistfield在cc.v.k=VVOID的时候什么也没做,接下来运行到field:

static void field (LexState *ls, struct ConsControl *cc) {
  /* field -> listfield | recfield */
  switch(ls->t.token) {
    case TK_NAME: {  /* may be 'listfield' or 'recfield' */
      if (luaX_lookahead(ls) != '=')  /* expression? */
        listfield(ls, cc);
      else
        recfield(ls, cc);
      break;
    }
    case '[': {
      recfield(ls, cc);
      break;
    }
    default: {
      listfield(ls, cc);
      break;
    }
  }
}

field有三种表示:
1. {key=value}形式,例如 t = {a=1,b=2,…}
2. {[“key”]=value}形式,例如t = {[“a”]=1, [2] = 2}
3. {val1,val2,…} 形式,例如 t = {1,2,3,4}

其中case1和case2比较类似,它们最终都调用recfield来实现:

static void recfield (LexState *ls, struct ConsControl *cc) {
  /* recfield -> (NAME | '['exp1']') = exp1 */
  FuncState *fs = ls->fs;
  int reg = ls->fs->freereg;
  expdesc key, val;
  int rkkey;
  if (ls->t.token == TK_NAME) {
    checklimit(fs, cc->nh, MAX_INT, "items in a constructor");
    checkname(ls, &key);
  }
  else  /* ls->t.token == '[' */
    yindex(ls, &key);
  cc->nh++;
  checknext(ls, '=');
  rkkey = luaK_exp2RK(fs, &key);
  expr(ls, &val);
  luaK_codeABC(fs, OP_SETTABLE, cc->t->u.info, rkkey, luaK_exp2RK(fs, &val));
  fs->freereg = reg;  /* free registers */
}

先调用checkname或者yindex,为表达式key赋值,然后将表达式安置到寄存器或常量表中,skip ‘=’,接着解析expr表达式,保存结果到val。
最后调用luaK_codeABC编码,一条OP_SETTABLE指令就这样产生了,其中cc->t->u.info表示table,rkkey表示key, luaK_exp2RK(fs,&val)表示value
逻辑指令为:OP_SETTABLE table, key, value。

再看case3,调用listfield:

static void listfield (LexState *ls, struct ConsControl *cc) {
  /* listfield -> exp */
  expr(ls, &cc->v);
  checklimit(ls->fs, cc->na, MAX_INT, "items in a constructor");
  cc->na++;
  cc->tostore++;
}

static void closelistfield (FuncState *fs, struct ConsControl *cc) {
  if (cc->v.k == VVOID) return;  /* there is no list item */
  luaK_exp2nextreg(fs, &cc->v);
  cc->v.k = VVOID;
  if (cc->tostore == LFIELDS_PER_FLUSH) {
    luaK_setlist(fs, cc->t->u.info, cc->na, cc->tostore);  /* flush */
    cc->tostore = 0;  /* no more items pending */
  }
}

listfield直接将解析的表达式保存在cc->v中,等到do-while一下次循环,调用closelistfield,将cc->v的值再保存到寄存器中。

最后调用到lastlistfield :

static void lastlistfield (FuncState *fs, struct ConsControl *cc) {
  if (cc->tostore == 0) return;
  if (hasmultret(cc->v.k)) {
    luaK_setmultret(fs, &cc->v);
    luaK_setlist(fs, cc->t->u.info, cc->na, LUA_MULTRET);
    cc->na--;  /* do not count last expression (unknown number of elements) */
  }
  else {
    if (cc->v.k != VVOID)
      luaK_exp2nextreg(fs, &cc->v);
    luaK_setlist(fs, cc->t->u.info, cc->na, cc->tostore);
  }
}

查看 if (hasmultret(cc->v.k)) 语句的else分支,发现,接着又调用了luaK_exp2nextreg,这等于变相的调用了一次closelistfield,目的是为了对最后一个list元素进行处理,接着调用luaK_setlist进行编码。

回头再看constructor函数实现的最后两行:

  SETARG_B(fs->f->code[pc], luaO_int2fb(cc.na)); /* set initial array size */
  SETARG_C(fs->f->code[pc], luaO_int2fb(cc.nh));  /* set initial table size */

上面这段代码用来设定OP_NEWTABLE指令的两个参数,一个表示数组大小,一个表示hash表大小。
到这里,table constructor讲解完毕。

2.1.2 suffixedexp

请看该函数的定义:

/*
 * 解析带有后缀的表达式,分以下几种形式:
 * 1. exp.a.b...    使用域运算符
 * 2. exp[a][b]...  使用table键值引用
 * 3. exp:a(...)    使用成员函数调用
 * 4. exp(...)      直接函数调用
 */
static void suffixedexp (LexState *ls, expdesc *v) {
  /* suffixedexp ->
       primaryexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs } */
  FuncState *fs = ls->fs;
  int line = ls->linenumber;
  primaryexp(ls, v);
  for (;;) {
    switch (ls->t.token) {
      case '.': {  /* fieldsel */
        fieldsel(ls, v);
        break;
      }
      case '[': {  /* '[' exp1 ']' */
        expdesc key;
        luaK_exp2anyregup(fs, v);
        yindex(ls, &key);
        luaK_indexed(fs, v, &key);
        break;
      }
      case ':': {  /* ':' NAME funcargs */
        expdesc key;
        luaX_next(ls);
        checkname(ls, &key);
        luaK_self(fs, v, &key);
        funcargs(ls, v, line);
        break;
      }
      case '(': case TK_STRING: case '{': {  /* funcargs */
        luaK_exp2nextreg(fs, v);
        funcargs(ls, v, line);
        break;
      }
      default: return;
    }
  }
}

如果是’.’符号,调用fieldsel:

static void fieldsel (LexState *ls, expdesc *v) {
  /* fieldsel -> ['.' | ':'] NAME */
  FuncState *fs = ls->fs;
  expdesc key;
  luaK_exp2anyregup(fs, v);
  luaX_next(ls);  /* skip the dot or colon */
  checkname(ls, &key);
  luaK_indexed(fs, v, &key);
}

先调用luaK_exp2anyregup将其转到寄存器中,然后读取’.’后面的key,编码成indexed。
luaK_indexed定义如下:

void luaK_indexed (FuncState *fs, expdesc *t, expdesc *k) {
  lua_assert(!hasjumps(t));
  t->u.ind.t = t->u.info;
  t->u.ind.idx = luaK_exp2RK(fs, k);
  t->u.ind.vt = (t->k == VUPVAL) ? VUPVAL
                                 : check_exp(vkisinreg(t->k), VLOCAL);
  t->k = VINDEXED;
}

可以看到,t->u.ind里面的字段是专门为VINDEXED使用的,包括table, key, type(VUPVAL, VLOCAL)。
注意,最后将t->k设置为VINDEXED了。

读取’:’,之后后来接着读name,然后编码luaK_self:

/*
* R(A+1) = R(B)
* R(A)   = R(B)[Rk(C)]
*/
void luaK_self (FuncState *fs, expdesc *e, expdesc *key) {
  int ereg;
  luaK_exp2anyreg(fs, e);
  ereg = e->u.info;  /* register where 'e' was placed */
  freeexp(fs, e);
  e->u.info = fs->freereg;  /* base register for op_self */
  e->k = VNONRELOC;
  luaK_reserveregs(fs, 2);  /* function and 'self' produced by op_self */
  luaK_codeABC(fs, OP_SELF, e->u.info, ereg, luaK_exp2RK(fs, key));
  freeexp(fs, key);
}

函数注释表示的就是OP_SELF完成的功能。
注意:luaK_reserveregs(fs,2)保留两个寄存器,一个是给R(A)用,一个是给R(A+1)用。

无论如何,后面都会调用到funcargs函数:

static void funcargs (LexState *ls, expdesc *f, int line) {
  FuncState *fs = ls->fs;
  expdesc args;
  int base, nparams;
  switch (ls->t.token) {
    case '(': {  /* funcargs -> '(' [ explist ] ')' */
      luaX_next(ls);
      if (ls->t.token == ')')  /* arg list is empty? */
        args.k = VVOID;
      else {
        explist(ls, &args);
        luaK_setmultret(fs, &args);
      }
      check_match(ls, ')', '(', line);
      break;
    }
    case '{': {  /* funcargs -> constructor */
      constructor(ls, &args);
      break;
    }
    case TK_STRING: {  /* funcargs -> STRING */
      codestring(ls, &args, ls->t.seminfo.ts);
      luaX_next(ls);  /* must use 'seminfo' before 'next' */
      break;
    }
    default: {
      luaX_syntaxerror(ls, "function arguments expected");
    }
  }
  lua_assert(f->k == VNONRELOC);
  base = f->u.info;  /* base register for call */
  if (hasmultret(args.k))
    nparams = LUA_MULTRET;  /* open call */
  else {
    if (args.k != VVOID)
      luaK_exp2nextreg(fs, &args);  /* close last argument */
    nparams = fs->freereg - (base+1);
  }

  init_exp(f, VCALL, luaK_codeABC(fs, OP_CALL, base, nparams+1, 2));
  luaK_fixline(fs, line);
  /*  */
  fs->freereg = base+1;  /* call remove function and arguments and leaves
                            (unless changed) one result */
}

里面会读到表达式列表,可以看到表达式列表中的每一个表达式都被放在寄存器中,。。。最后编码成
OP_CALL,表明它是一个函数调用。

2.1.3 test_then_block

看一下这个函数test_then_block:

/*
 * 解析 if-then 语句
 */
static void test_then_block (LexState *ls, int *escapelist) {
  /* test_then_block -> [IF | ELSEIF] cond THEN block */
  BlockCnt bl;
  FuncState *fs = ls->fs;
  expdesc v;
  int jf;  /* instruction to skip 'then' code (if condition is false) */
  luaX_next(ls);  /* skip IF or ELSEIF */
  expr(ls, &v);  /* read condition */
  checknext(ls, TK_THEN);
  if (ls->t.token == TK_GOTO || ls->t.token == TK_BREAK) {
    luaK_goiffalse(ls->fs, &v);  /* will jump to label if condition is true */
    enterblock(fs, &bl, 0);  /* must enter block before 'goto' */
    gotostat(ls, v.t);  /* handle goto/break */
    skipnoopstat(ls);  /* skip other no-op statements */
    if (block_follow(ls, 0)) {  /* 'goto' is the entire block? */
      leaveblock(fs);
      return;  /* and that is it */
    }
    else  /* must skip over 'then' part if condition is false */
      jf = luaK_jump(fs);
  }
  else {  /* regular case (not goto/break) */
    luaK_goiftrue(ls->fs, &v);  /* skip over block if condition is false */
    enterblock(fs, &bl, 0);
    /* 设定if判定为false时需要执行的JMP指令(所对应的pc)*/
    jf = v.f;
  }
  statlist(ls);  /* 'then' part */
  leaveblock(fs);
  if (ls->t.token == TK_ELSE ||
      ls->t.token == TK_ELSEIF)  /* followed by 'else'/'elseif'? */
    /* 如果有else或者elseif则需要insert JMP指令,该JMP指令用于跳出接下来的语句 */
    luaK_concat(fs, escapelist, luaK_jump(fs));  /* must jump over it */
  luaK_patchtohere(fs, jf);
}

解析完if语句里面的表达式之后,假定下一条语句不是goto,则进入regular case,接着调用luaK_goiftrue:

/*
 * 对表达式e进行判定,若为true则pc++;如果为fales则需要执行JMP
 */
void luaK_goiftrue (FuncState *fs, expdesc *e) {
  int pc;  /* pc of last jump */
  luaK_dischargevars(fs, e);
  switch (e->k) {
    case VJMP: {
      invertjump(fs, e);
      pc = e->u.info;
      break;
    }
    case VK: case VKFLT: case VKINT: case VTRUE: {
      pc = NO_JUMP;  /* always true; do nothing */
      break;
    }
    default: {
      pc = jumponcond(fs, e, 0);
      break;
    }
  }
  luaK_concat(fs, &e->f, pc);  /* insert last jump in 'f' list */
  luaK_patchtohere(fs, e->t);
  e->t = NO_JUMP;
}

由于if的判定表达式一般为VJMP,则调用invertjump(这个实际上跟lvm.c里面解析OPTEST命令相关),标明为false的话则需要跳转,然后还将e->f和pc concat起来。如果if的判定式必定为TRUE的话,则pc=NO_JUMP,表示不可能跳转了。

调用luaK_goiftrue返回,接着解析if语句里面的statelist,解析完之后如果紧跟着else或者else-if,则马上生产一条JMP指令,用于escape整条if-{else-if|else}-end 语句。

一直循环下去直至遇到else-end语句。

回头再来看ifstat函数:

static void ifstat (LexState *ls, int line) {
  /* ifstat -> IF cond THEN block {ELSEIF cond THEN block} [ELSE block] END */
  FuncState *fs = ls->fs;
  int escapelist = NO_JUMP;  /* exit list for finished parts */
  test_then_block(ls, &escapelist);  /* IF cond THEN block */
  while (ls->t.token == TK_ELSEIF)
    test_then_block(ls, &escapelist);  /* ELSEIF cond THEN block */
  if (testnext(ls, TK_ELSE))
    block(ls);  /* 'else' part */
  check_match(ls, TK_END, TK_IF, line);
  luaK_patchtohere(fs, escapelist);  /* patch escape list to 'if' end */
}

调用luaK_patchtohere函数:将escapelist这条指向JMP指令“hold住”,然后让这条JMP指令可以直接跳转到下一条指令:

/*
 *  设置fs->jpc到list
 *  以便在对下一条语句进行编码时,将pc[list]的JMP目标设置为下一条语句
 */
void luaK_patchtohere (FuncState *fs, int list) {
  luaK_getlabel(fs);
  luaK_concat(fs, &fs->jpc, list);
}

可以看见,它是直接通过修改jpc来实现的,然后在下一条指令编码的时候必定调用该函数:

static int luaK_code (FuncState *fs, Instruction i) {
  Proto *f = fs->f;
  dischargejpc(fs);  /* 'pc' will change */
  /* put new instruction in code array */
  luaM_growvector(fs->ls->L, f->code, fs->pc, f->sizecode, Instruction,
                  MAX_INT, "opcodes");
  f->code[fs->pc] = i;
  /* save corresponding line information */
  luaM_growvector(fs->ls->L, f->lineinfo, fs->pc, f->sizelineinfo, int,
                  MAX_INT, "opcodes");
  f->lineinfo[fs->pc] = fs->ls->lastline;
  return fs->pc++;
}

static void patchlistaux (FuncState *fs, int list, int vtarget, int reg,
                          int dtarget) {
  while (list != NO_JUMP) {
    int next = getjump(fs, list);
    if (patchtestreg(fs, list, reg))
      fixjump(fs, list, vtarget);
    else
      fixjump(fs, list, dtarget);  /* jump to default target */
    list = next;
  }
}

/*
 * 令jpc JMP 到当前 fs->pc
 * 注意: jpc由luaK_patchtohere函数来修改
 */
static void dischargejpc (FuncState *fs) {
  patchlistaux(fs, fs->jpc, fs->pc, NO_REG, fs->pc);
  fs->jpc = NO_JUMP;
}

调用dischargejpc,就是使jpc可以直接跳转到当前编码的这条指令。

假定if语句是这样子的:
if a > b then
max = a
else
max = b
end

max = max*2

0: LT 0,b,a
1: JMP 4
2: MOV max, a
3: JMP 5
4: MOV max b
5: MUL max, 2

其中1:JMP 4 表示jump to false,而3:JMP 5表示jump to escape。

未完待续… …

猜你喜欢

转载自blog.csdn.net/Lunar_lty/article/details/77100210