源码
static char * clock_source_name[3]={
"INNER","OUTER","XXSIM"
};
void CLK_Init(rfiu_clock_t clock_source){
printf("\nTBU CLOCK SOURCE : %s\n\n",clock_source_name[clock_source]);
}
禁止relaxation
4a: 00000717 auipc a4,0x0
4e: 00070713 mv a4,a4
52: fec46783 lwu a5,-20(s0)
56: 078e c.slli a5,0x3
58: 97ba c.add a5,a4
5a: 639c c.ld a5,0(a5)
5c: 85be c.mv a1,a5
5e: 00000517 auipc a0,0x0
62: 00050513 mv a0,a0
66: 00000097 auipc ra,0x0
6a: 000080e7 jalr ra # 66 <.L4+0x1c>
0000000040863350 G __global_pointer$
0000000040861b60 d clock_source_name
4083c714: 00025717 auipc a4,0x25
4083c718: 44c70713 addi a4,a4,1100 # 40861b60 <clock_source_name>
4083c71c: fec46783 lwu a5,-20(s0)
4083c720: 078e c.slli a5,0x3
4083c722: 97ba c.add a5,a4
4083c724: 639c c.ld a5,0(a5)
4083c726: 85be c.mv a1,a5
4083c728: 0001a517 auipc a0,0x1a
4083c72c: d6850513 addi a0,a0,-664 # 40856490 <__func__.0+0xd468>
4083c730: f81c60ef jal ra,408036b0 <printf>
使能relaxation
4a: 00000717 auipc a4,0x0
4e: 00070713 mv a4,a4
52: fec46783 lwu a5,-20(s0)
56: 078e c.slli a5,0x3
58: 97ba c.add a5,a4
5a: 639c c.ld a5,0(a5)
5c: 85be c.mv a1,a5
5e: 00000517 auipc a0,0x0
62: 00050513 mv a0,a0
66: 00000097 auipc ra,0x0
6a: 000080e7 jalr ra # 66 <.L4+0x1c>
00000000 40860ff0 D __global_pointer$
00000000 408556c0 d clock_source_name
0x 40860ff0 - 47408 == 0x 408556c0
链接后
4083bb44: ed09170b addigp a4,-47408 # 408556c0 <clock_source_name>
4083bb48: fec46783 lwu a5,-20(s0)
4083bb4c: 078e c.slli a5,0x3
4083bb4e: 97ba c.add a5,a4
4083bb50: 639c c.ld a5,0(a5)
4083bb52: 85be c.mv a1,a5
4083bb54: 8107150b addigp a0,-53232 # 40854000 <__func__.0+0xd338>
4083bb58: a65c70ef jal ra,408035bc <printf>
"\nTBU CLOCK SOURCE : %s\n\n"
Sections:
Idx Name Size VMA LMA File off Algn
0 .init 000000f8 0000000040800000 0000000040800000 00001000 2**0
CONTENTS, ALLOC, LOAD, READONLY, CODE
1 .text 00044ba0 00000000408000f8 00000000408000f8 000010f8 2**2
CONTENTS, ALLOC, LOAD, READONLY, CODE
2 .rodata 0000fcb0 0000000040844c98 0000000040844c98 00045c98 2**3
CONTENTS, ALLOC, LOAD, READONLY, DATA
40854000:540a
40854002:5542
40854004:4320
40854006:4f4c
40854008:53204b43
4085400c:4352554f
40854010:2045
40854012:203a
40854014:7325
40854016:0a0a
两种方式对比
使用gp 比不使用gp ,用了2次,少用了2条指令
平均 使用1次gp,比不用gp少1条指令,达到性能优化的效果
使用gp , 使之前的寻址(pc-relative) 变为 (gp-relative)
因为gp的值是固定的,不需要再次读取,直接计算目标地址
但是pc的值是变化的,需要花一个指令来读取pc,然后计算目标地址
所以要少一条指令
使用gp , 被称为 linker relaxation
在riscv 中,我们用gp寻址全局变量,用fp来寻址局部变量
程序员如何设置gp寄存器
链接脚本设置
链接脚本中不显式PROVIDE __global_pointer$ , 连接器会默认提供该符号
代码设置
_start:
# initialize global pointer
.option push
.option norelax
la gp, __global_pointer$
.option pop
000000004080000c <_start>:
4080000c: 00061197 auipc gp,0x61
40800010: fe418193 addi gp,gp,-28 # 40860ff0
_start:
# initialize global pointer
la gp, __global_pointer$
000000004080000c <_start>:
4080000c: 0000118b addigp gp,0 # 40860ff0
CFLAGS 传入, --Wl,-R,--gp=<address>
怎么使能和禁止relaxation
默认开启
使用gcc链接,用如下参数禁止
-Wl,--no-relax : 参数被传递给 链接器,影响链接时行为,即在链接时禁用所有的优化放松,即使汇编中已经使用了优化放松
-mno-relax :参数被传递给 编译器,影响编译时行为,即在编译生成汇编代码时,不要使用放松优化