一、安装pip和vim
sudo apt-get -y install python3-pip vim git
二、配置pip源
mkdir ~/.pip && vim ~/.pip/pip.conf
pip.conf
[global]
index-url = https://mirrors.cloud.tencent.com/pypi/simple
[install]
trusted-host = mirrors.cloud.tencent.com
三、安装显卡驱动和pytorch
# rocm5.4.2需要Ubuntu内核5.15+
$ cd ~ && uname -srmv
-
Ubuntu 22.04 + rocm 5.4.2
$ wget https://repo.radeon.com/amdgpu-install/5.4.2/ubuntu/jammy/amdgpu-install_5.4.50402-1_all.deb
$ sudo apt-get install ./amdgpu-install_5.4.50402-1_all.deb
$ sudo apt-get update
$ amdgpu-install -y --usecase=graphics,rocm
# 设置运行权限
$ ls -l /dev/dri/render*
$ sudo usermod -a -G render $LOGNAME
$ sudo usermod -a -G video $LOGNAME
$ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2
# 重启
$ reboot
-
Ubuntu 20.04 + rocm 5.4.2
$ wget https://repo.radeon.com/amdgpu-install/5.4.2/ubuntu/focal/amdgpu-install_5.4.50402-1_all.deb
$ sudo apt-get install ./amdgpu-install_5.4.50402-1_all.deb
$ sudo apt-get update
$ amdgpu-install -y --usecase=graphics,rocm
# 设置运行权限
$ ls -l /dev/dri/render*
$ sudo usermod -a -G render $LOGNAME
$ sudo usermod -a -G video $LOGNAME
$ pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.4.2
# 重启
$ reboot
-
Ubuntu 20.04 + rocm 5.2
$ wget https://repo.radeon.com/amdgpu-install/22.40.3/ubuntu/focal/amdgpu-install_5.4.50403-1_all.deb
$ sudo apt-get install ./amdgpu-install_5.4.50403-1_all.deb
$ sudo apt-get update
$ amdgpu-install -y --usecase=graphics
$ sudo apt-get purge amdgpu-install
$ wget https://repo.radeon.com/amdgpu-install/22.20/ubuntu/focal/amdgpu-install_22.20.50200-1_all.deb
$ sudo apt-get install ./amdgpu-install_22.20.50200-1_all.deb
$ sudo apt-get update
$ amdgpu-install -y --usecase=rocm
# 设置运行权限
$ ls -l /dev/dri/render*
$ sudo usermod -a -G render $LOGNAME
$ sudo usermod -a -G video $LOGNAME
$ pip3 install torch==1.13.1+rocm5.2 torchvision==0.14.1+rocm5.2 torchaudio==0.13.1 --extra-index-url https://download.pytorch.org/whl/rocm5.2
# 重启
$ reboot
四、检测显卡驱动是否安装成功
# 显示GPU活动信息
$ rocm-smi
# 检查内核模式驱动程序的安装
$ dkms status
# 如果两个命令都列出了GPU,则认为安装成功
$ /opt/rocm-5.4.2/bin/rocminfo
# 或者
$ /opt/rocm-5.4.2/opencl/bin/clinfo
五、检测pytorch是否安装成功
当显示为True时,才能证明rocm安装成功
$ vim ~/check.py
$ python3 ~/check.py
check.py
import torch
print(torch.cuda.is_available())
六、错误解决
-
"hipErrorNoBinaryForGpu: Unable to find code object for all current devices!" 已放弃 (核心已转储)
check.py
import torch
import os
os.environ["HSA_OVERRIDE_GFX_VERSION"] = "10.3.0"
print(torch.cuda.is_available())
加入环境变量
# 对全局用户生效
$ sudo vim /etc/profile
$ source /etc/profile
# 对当前用户生效
$ vim ~/.bash_profile
$ source ~/.bash_profile
.bash_profile末尾加入
export HSA_OVERRIDE_GFX_VERSION=10.3.0