使用docker搭建scrapy爬虫节点

1. 在centos7上安装docker

#安装docker
yum install docker -y
#安装docker阿里云加速器,下面的PRIVATE_ID请换成自己的docker的仓库id
sudo mkdir -p /etc/docker
sudo tee /etc/docker/daemon.json <<-'EOF'
{
  "registry-mirrors": ["https://PRIVATE_ID.mirror.aliyuncs.com"]
}
EOF
sudo systemctl daemon-reload
#启动docker
systemctl start docker
#设置系统启动加载docker服务
chkconfig docker on

2. 下载最新centos镜像并运行容器

#下载centos:latest
docker pull docker.io/centos:latest
#查看image id = 84c2xxxx
docker images
#创建共享目录
mkdir -p /root/docker/shared
#运行容器,预留ssh 22端口映射为10022;预留mariaDB 3306映射为13306
docker run -p 10022:22 -p 13306:3306 --privileged --name centos -v /root/docker/shared:/mnt/shared -it docker.io/centos:latest /sbin/init

3. 在镜像中安装scrapy爬虫环境

#安装必要工具
yum install vim openssl-devel bzip2-devel expat-devel gdbm-devel readline-devel sqlite-devel wget -y
yum install -y gcc gcc-c++ autoconf automake libtool make
#下载python3cdwget https://www.python.org/ftp/python/3.6.4/Python-3.6.4.tgz#解压并安装python3tar zxvf Python-3.6.4.tgzcd Python-3.6.4
./configure --prefix=/usr/local/python3 --enable-optimizations
make && make install
ln -s /usr/local/python3/bin/python3.6 /usr/bin/python3
ln -s /usr/local/python3/bin/pip /usr/bin/pip3y

4. 备份python2.7并修正yum工具中python指向

#备份python2.7环境
mv /usr/bin/python /usr/bin/python.bak
ls -s /usr/bin/python3 /usr/bin/python
mv /usr/bin/pip /usr/bin/pip.bak
ls -s /usr/bin/pip3y /usr/bin/pip
#修改yum中的python环境指向
vim /usr/bin/yum
#将第一行 python改为python2.7
vim /usr/libexec/urlgrabber-ext-down 
#将第一行 python改为python2.7

5. 安装scrapy

#安装scrapy
pip install scrapy
#部署路径
ln -s /usr/local/python3/bin/scrapy /usr/bin/scrapy

6. 打包image并上传阿里云供后续备用

#在docker中退出container
exit
#在操作系统中查看container id
docker ps -a
#查到container id:9023xxx,打包为:scrapy:v1
docker commit 9023 scrapy:v1
#查看image id为8bdc
docker images
#将image更新/保存到阿里云
sudo docker login --username=ACCOUNT registry.cn-hangzhou.aliyuncs.com
sudo docker tag 8dbc registry.cn-hangzhou.aliyuncs.com/norble/scrapy:v1
sudo docker push registry.cn-hangzhou.aliyuncs.com/norble/scrapy:v1

猜你喜欢

转载自blog.csdn.net/xc70203/article/details/79136812