实现8086虚拟机（四）——mov 和 jmp 指令解码

文章目录

- mov 指令解码
- jmp 指令解码

这篇文章举例来讲讲 mov 指令和 jmp 指令解码函数的实现，其他的指令解码函数都与这些类似。

mov 指令解码

以 mov 指令中的一类：寄存器/内存到/从寄存器，来详细说明解码函数的实现。

机器指令格式如下：
在这里插入图片描述
各字段的含义如下：

w 1 bit  w=0表示数据宽度是字节，w=1表示数据宽度是字
d 1 bit   d=0表示reg是源操作数，d=1表示reg是目的操作数
reg 3 bits
	REG W=0 W=1
	000 AL AX
	001 CL CX
	010 DL DX
	011 BL BX
	100 AH SP
	101 CH BP
	110 DH SI
	111 BH Dl
mod 2 bits
	00 MemoryMode,nodisplacement follows
	01 MemoryMode,8-bit displacementfollows
	10 MemoryMode,16-bit displacementfollows
	11 RegisterMode(no displacement)
rm 3 bits
	MOD=11              EFFECTIVE ADDRESS CALCULATION
	R/M w=0 w=1   R/M  MOD=00        MOD=01          MOD=10
	000 AL AX     000 (BX)+(SI)     (BX)+(SI)+D8    (BX)+(SI)+D16
	001 CL CX     001 (BX)+(DI)     (BX)+(DI)+D8    (BX)+(DI)+D16
	010 DL DX     010 (BP)+(SI)     (BP)+(SI)+D8    (BP)+(SI)+D16
	011 BL BX     011 (BP)+(DI)     (BP)+(DI)+D8    (BP)+(DI)+D16
	100 AH SP     100 (SI)          (SI)+D8         (SI)+D16
	101 CH BP     101 (DI)          (DI)+D8         (DI)+D16
	110 DH SI     110 DIRECTADDRESS (BP)+D8         (BP)+D16
	111 BH DI     111 (BX)          (BX)+D8         (BX)+D16

解码函数 decodeMovRegOrMemoryToFromReg 的目的就是将它转换为如下形式的中间指令格式：

指令类型，指令详细类型，[源操作数]，[目的操作数]

decodeMovRegOrMemoryToFromReg 函数首先检测输入的机器指令的长度：

func decodeMovRegOrMemoryToFromReg(instructions []byte) []byte {
    
    

	/* 0b100010dw,mod reg r/m, (DISP-LO ), (DiSP-HI)*/
	decodegth := len(instructions)
	if decodegth < 2 {
    
    
		return nil
	}

	dispLen := lenDisplacement(instructions[1])
	if decodegth < 2+dispLen {
    
    
		return nil
	}

	....
}

由上文机器指令格式可知，这个指令至少有 2 字节长。如果长度达到 2 字节，再调用 lenDisplacement 获取偏移量的长度。如果指令长度达不到指令格式的要求，说明不是一条完整的指令，那就返回 nil。

lenDisplacement 的实现如下：

/* mod xxx r/m, (DISP-LO ), (DiSP-HI) */
func lenDisplacement(secondByte byte) int {
    
    
	mod := (secondByte & 0b11000000) >> 6
	rm := secondByte & 0b111
	if mod == 0b11 {
    
    
		return 0
	}

	if mod == 0b00 {
    
     /* mov bx, [1]*/
		if rm == 0b110 {
    
    
			return 2
		}
		return 0
	}

	if mod == 0b01 {
    
    
		return 1
	}

	return 2
}

就是根据 mod 和 rm 字段的含义返回偏移量的长度。

然后就是根据 d、w、mod、rm、reg 字段的含义，确定指令详细类型和操作数，将中间形式的指令格式返回：

	decodedInstructions := []byte{
    
    InstructionMov}
	// return 2 + dispLen
	d := (instructions[0] & 0b10) >> 1
	w := instructions[0] & 0b1
	mod := (instructions[1] & 0b11000000) >> 6
	reg := (instructions[1] & 0b111000) >> 3
	rm := instructions[1] & 0b111
	switch mod {
    
    
	case 0b11: //RegisterMode(no displacement)
		if w == 0 {
    
    
			decodedInstructions = append(decodedInstructions, MovReg8ToReg8)
		} else {
    
    
			decodedInstructions = append(decodedInstructions, MovReg16ToReg16)
		}
		if d == 0 {
    
     //reg是源操作数
			decodedInstructions = append(decodedInstructions, reg)
			decodedInstructions = append(decodedInstructions, rm)
		} else {
    
    
			decodedInstructions = append(decodedInstructions, rm)
			decodedInstructions = append(decodedInstructions, reg)
		}
	default:
		if d == 0 {
    
    
			if w == 0 {
    
    
				decodedInstructions = append(decodedInstructions, MovReg8ToMemory)
			} else {
    
    
				decodedInstructions = append(decodedInstructions, MovReg16ToMemory)
			}
			decodedInstructions = append(decodedInstructions, reg)
			decodedInstructions = append(decodedInstructions,
				decodeMemoryOperand(mod, rm, instructions[2:])...)

		} else {
    
    
			if w == 0 {
    
    
				decodedInstructions = append(decodedInstructions, MovMemoryToReg8)
			} else {
    
    
				decodedInstructions = append(decodedInstructions, MovMemoryToReg16)
			}
			decodedInstructions = append(decodedInstructions,
				decodeMemoryOperand(mod, rm, instructions[2:])...)
			decodedInstructions = append(decodedInstructions, reg)
		}
	}
	return decodedInstructions

比如，当 mod 字段为 0b11 时，表示两个操作数都是寄存器，如果 d 为 0，那么 reg 字段就是源操作数，rm 字段就是目的操作数。如果 w 为1，那么操作数的宽度就是16位。这时候生成的中间指令格式为：

InstructionMov，MovReg16ToReg16 ，reg ， rm

如果 reg 的值是 0，rm 的值是 1，这条指令的源汇编指令就是：

mov ax，cx

就是这么简单。

再看下解码 mov 立即数到内存/寄存器解码函数 decodeMovImmediateToRegOrMemory 的实现：

func decodeMovImmediateToRegOrMemory(instructions []byte) []byte {
    
    
	/*1100011w, mod 000 rm, [disp-lo] [disp-hi] data [data]*/
	decodegth := len(instructions)
	if decodegth < 2 {
    
    
		return nil
	}
	w := instructions[0] & 0x1
	dispLen := lenDisplacement(instructions[1])
	dataLen := 1
	if w == 1 {
    
    
		dataLen = 2
	}

	if decodegth < 2+dispLen+dataLen {
    
    
		return nil
	}

	decodedInstructions := []byte{
    
    InstructionMov}
	mod := (instructions[1] & 0b11000000) >> 6
	rm := instructions[1] & 0b111
	if w == 0 {
    
    
		if mod == 0b11 {
    
    
			decodedInstructions = append(decodedInstructions, MovImmediateToReg8)
			decodedInstructions = append(decodedInstructions, instructions[2])
			decodedInstructions = append(decodedInstructions, rm)
		} else {
    
    
			decodedInstructions = append(decodedInstructions, MovImmediate8ToMemory)
			decodedInstructions = append(decodedInstructions, instructions[decodegth-1])
			decodedInstructions = append(decodedInstructions,
				decodeMemoryOperand(mod, rm, instructions[2:decodegth-1])...)
		}
	} else {
    
    
		if mod == 0b11 {
    
    
			decodedInstructions = append(decodedInstructions, MovImmediateToReg16)
			decodedInstructions = append(decodedInstructions, instructions[2])
			decodedInstructions = append(decodedInstructions, instructions[3])
			decodedInstructions = append(decodedInstructions, rm)
		} else {
    
    
			decodedInstructions = append(decodedInstructions, MovImmediate16ToMemory)
			decodedInstructions = append(decodedInstructions, instructions[decodegth-2])
			decodedInstructions = append(decodedInstructions, instructions[decodegth-1])
			decodedInstructions = append(decodedInstructions,
				decodeMemoryOperand(mod, rm, instructions[2:decodegth-2])...)
		}
	}
	return decodedInstructions
}

其他的都类似。

jmp 指令解码

jmp 指令包含直接转移和条件转移。
decode_jmp.go 中先把所有的指令详细类型定义出来：

const (
	//非条件转移
	JmpNotShort           uint8 = iota //16位IP偏移量
	JmpShort                           //8位IP偏移量
	JmpDirectIntersegment              //cs 16位，IP 16位
	JmpReg16                           //IP的值在寄存器中
	JmpIndirectWithinsegment
	JmpIndirectIntersegment
	//条件转移
	JmpJo
	Jmpjno
	JmpJb
	JmpJnb
	JmpJe
	JmpJne
	JmpJbe
	JmpJnbe
	JmpJs
	JmpJns
	JmpJp
	JmpJnp
	JmpJl
	JmpJnl
	JmpJle
	JmpJnle
	JmpJcxz
)

初始化函数，注册所有的 jmp 指令与它的解码函数：

func init() {
    
    
	//jmp
	AddDecodeInstruction(0xE9, decodeJmpDirectWithinsegment)
	AddDecodeInstruction(0xEA, decodeJmpDirectIntersegment)
	AddDecodeInstruction(0xEB, decodeJmpDirectWithinsegmentShort)
	AddDecodeInstruction2(0xFF, 0b100, decodeJmpIndirectWithinsegment)
	AddDecodeInstruction2(0xFF, 0b101, decodeJmpIndirectIntersegment)
	var firstByte byte
	for firstByte = 0x70; firstByte <= 0x7F; firstByte++ {
    
    
		AddDecodeInstruction(firstByte, decodeJmpConditional)
	}
	//jcxz
	AddDecodeInstruction(0xE3, decodeJmpConditional)

}

以段内间接转移为例，它的机器指令格式如下：
在这里插入图片描述
对应的解码函数 decodeJmpIndirectWithinsegment 代码如下：

func decodeJmpIndirectWithinsegment(instructions []byte) []byte {
    
    
	/*11111111,mod 1 0 0 r/m,(DISP-LO ) (DISP-HI)*/
	decodegth := len(instructions)
	if decodegth < 2 {
    
    
		return nil
	}

	dispLen := lenDisplacement(instructions[1])
	if decodegth < 2+dispLen {
    
    
		return nil
	}

	decodedInstructions := []byte{
    
    InstructionJmp}
	mod := (instructions[1] & 0b11000000) >> 6
	rm := instructions[1] & 0b111
	if mod == 0b11 {
    
    
		decodedInstructions = append(decodedInstructions, JmpReg16)
		decodedInstructions = append(decodedInstructions, rm)
	} else {
    
    
		decodedInstructions = append(decodedInstructions, JmpIndirectWithinsegment)
		decodedInstructions = append(decodedInstructions,
			decodeMemoryOperand(mod, rm, instructions[2:])...)
	}

	return decodedInstructions
}

如果 mod 是 0b11，则返回的中间形式机器指令为：

InstructionJmp，JmpReg16，rm

条件转移的解码函数就更简单，因为条件转移的机器指令固定 2 个字节：
在这里插入图片描述
只需根据第一个字节确定详细指令类型即可，它的实现如下：

func decodeJmpConditional(instructions []byte) []byte {
    
    
	/*xxxxxxxx,IP-INC8*/
	if len(instructions) < 2 {
    
    
		return nil
	}

	table := map[uint8]uint8{
    
    
		0x70: JmpJo,
		0x71: Jmpjno,
		0x72: JmpJb,
		0x73: JmpJnb,
		0x74: JmpJe,
		0x75: JmpJne,
		0x76: JmpJbe,
		0x77: JmpJnbe,
		0x78: JmpJs,
		0x79: JmpJns,
		0x7A: JmpJp,
		0x7B: JmpJnp,
		0x7C: JmpJl,
		0x7D: JmpJnl,
		0x7E: JmpJle,
		0x7F: JmpJnle,
		0xE3: JmpJcxz,
	}

	return []byte{
    
    InstructionJmp, table[instructions[0]], instructions[1]}
}

其他指令的解码函数实现都类似。

后续文章讲解 EU 如何实现执行 mov，jmp 以及一些算数运算指令。