课后作业最后一题
制作搜索器
- [ 1] 打印词条内容
- [ 2]允许用户输入搜索的关键词
- [ 3]打印目录中第一个标题
- [ 4]打印第一段内容
- [ 5]打印第二个标题下的第一段内容
- [ 6]打印标题、副标题和简介(即词条内容)
- [ 7]简化代码
第一步
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
url = "http://baike.baidu.com/view/284853.htm"
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser") # 使用 Python 默认的解析器
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
if __name__ == "__main__":
main()
第二步
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
if __name__ == "__main__":
main()
第三步
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
print(ty[2])
if __name__ == "__main__":
main()
第四步
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
if __name__ == "__main__":
main()
第五步
import urllib.request
import re
from bs4 import BeautifulSoup
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
if __name__ == "__main__":
main()
第六步
import urllib.request
import re
from bs4 import BeautifulSoup
def summary(soup):
word = soup.h1.text
print(word)
if soup.h2:
word += soup.h2.text
if soup.find(class_="lemma-summary"):
print(soup.find(class_="lemma-summary").text)
def main():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
print("-->"*30)
summary(soup)
if __name__ == "__main__":
main()
第七步
import urllib.request
import re
from bs4 import BeautifulSoup
def summary(soup):
word = soup.h1.text
print(word)
if soup.h2:
word += soup.h2.text
if soup.find(class_="lemma-summary"):
print(soup.find(class_="lemma-summary").text)
def body():
keyword = input("请输入关键词:")
keyword = urllib.parse.urlencode({"word":keyword})
url = "http://baike.baidu.com/search/word?%s" % keyword
response = urllib.request.urlopen(url)
html = response.read()
soup = BeautifulSoup(html, "html.parser")
tr = soup.find_all("meta")
ts = tr[3]
tz = ts["content"]
print(tz)
print("-->"*30)
ty = soup.find_all("h2")
ta = ty[2]
print(ta.get_text()+":")
tb = soup.find(class_="para")
print(tb.get_text())
tc = ty[3]
print(tc.get_text()+":")
tb = soup.find_all(class_="para")
te = tb[3]
print(te.get_text())
print("-->"*30)
summary(soup)
def main():
body()
if __name__ == "__main__":
main()