python对excel的基本操作（冒险岛新闻公告为例）（新手向）

import requests
from bs4 import BeautifulSoup
import xlwt
import sys

def  isConnected():
  import requests
  try:
   html = requests.get("http://www.baidu.com",timeout=2)
  except:
   return False
  return True


if  not isConnected():
    print("网络连接失败")
    sys.exit(0)



url = "http://mxd.sdo.com/web6/home/index.asp"
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
html_code = requests.get(url,headers=headers)

soup = BeautifulSoup(html_code.text,"html.parser")

soup_1 = soup.find("div",attrs={"class","news-list"})

soup_2 = soup_1.find_all("a")

i= 0
for ele in soup_2:

    # print(i,"-", ele)
    kw = "更多"
    if kw in ele:
        pass
    else:
        i = i + 1
        if ".." in ele.get("href"):
            print(i, "-", ele.get_text(), "-","http://mxd.sdo.com/web6"+ ele.get("href").replace("..",""))
        else:
            print(i, "-", ele.get_text(), "-",ele.get("href"))




head = ["ID","新闻名称","地址"]

workbook = xlwt.Workbook(encoding="utf-8")

sheet_1 = workbook.add_sheet("sheet1")

sheet_1.col(1).width = 16000
sheet_1.col(2).width = 18000



for i in range(len(head)):
    sheet_1.write(0,i,head[i])

for i in range(len(soup_2)-1):
    sheet_1.write(i+1, 0,i+1)


name = []
for ele_2 in soup_2:
    if not (kw in ele_2):
        name.append(ele_2.get_text())


for i in range(len(soup_2)-1):
        sheet_1.write(i + 1, 1,name[i])


links = []
for ele_2 in soup_2:
    if ".." in ele_2.get("href"):
        links.append("http://mxd.sdo.com/web6" + ele_2.get("href").replace("..", ""))
    else:
        links.append(ele_2.get("href"))


for i in range(len(soup_2)-1):
        sheet_1.write(i + 1, 2,links[i])

workbook.save("test.xlsx")

写出excel截图：

Ferencz

发布了5 篇原创文章 · 获赞 1 · 访问量 459

私信关注

python对excel的基本操作（冒险岛新闻公告为例）（新手向）

猜你喜欢