首个博客

课后作业最后一题

制作搜索器

  • [ 1] 打印词条内容
  • [ 2]允许用户输入搜索的关键词
  • [ 3]打印目录中第一个标题
  • [ 4]打印第一段内容
  • [ 5]打印第二个标题下的第一段内容
  • [ 6]打印标题、副标题和简介(即词条内容)
  • [ 7]简化代码

第一步

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    url = "http://baike.baidu.com/view/284853.htm"
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser") # 使用 Python 默认的解析器
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)

if __name__ == "__main__":
    main()

第二步

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)

if __name__ == "__main__":
    main()

第三步

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    print(ty[2])

if __name__ == "__main__":
    main()

第四步

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())

if __name__ == "__main__":
    main()

第五步

import urllib.request
import re
from bs4 import BeautifulSoup

def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
  
if __name__ == "__main__":
    main()


第六步

import urllib.request
import re
from bs4 import BeautifulSoup

def summary(soup):
    word = soup.h1.text
    print(word)
    if soup.h2:
        word += soup.h2.text
    if soup.find(class_="lemma-summary"):
        print(soup.find(class_="lemma-summary").text)
def main():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
    print("-->"*30)
    summary(soup)

  
if __name__ == "__main__":
    main()

第七步

import urllib.request
import re
from bs4 import BeautifulSoup

def summary(soup):
    word = soup.h1.text
    print(word)
    if soup.h2:
        word += soup.h2.text
    if soup.find(class_="lemma-summary"):
        print(soup.find(class_="lemma-summary").text)
def body():
    keyword = input("请输入关键词:")
    keyword = urllib.parse.urlencode({"word":keyword})
    url = "http://baike.baidu.com/search/word?%s" % keyword
    response = urllib.request.urlopen(url)
    html = response.read()
    soup = BeautifulSoup(html, "html.parser")
    tr = soup.find_all("meta")
    ts = tr[3]
    tz = ts["content"]
   
    print(tz)
    print("-->"*30)
    ty = soup.find_all("h2")
    ta = ty[2]
    print(ta.get_text()+":")
    tb = soup.find(class_="para")
    print(tb.get_text())


    tc = ty[3]
    print(tc.get_text()+":")
    tb = soup.find_all(class_="para")
    te = tb[3]
    print(te.get_text())
    print("-->"*30)
     
    summary(soup)
def main():
    body()
   

  
if __name__ == "__main__":
    main()

猜你喜欢

转载自blog.csdn.net/qq_51598376/article/details/112361544