版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
urlsplit、urljoin
from urllib.parse import urlsplit, urljoin
print('\033[035m{}\033[0m\n'.format(urlsplit('https://www.baidu.com/s?ie=UTF-8&wd=scrapy')))
def join_url(url, postfix, real):
tu = urlsplit(url)
domain = tu[0] + '://' + tu[1]
url_total = urljoin(domain, postfix)
for i in ['url', 'domain', 'url_total', 'real', 'url_total==real']:
print('\033[033m%-15s\033[0m' % i, eval(i))
print()
ls = [
('https://blog.csdn.net/Yellow_python',
'https://blog.csdn.net/Yellow_python/article/details/94435972',
'https://blog.csdn.net/Yellow_python/article/details/94435972'),
('https://k.autohome.com.cn/314/#pvareaid=2099126',
'/spec/36144/',
'https://k.autohome.com.cn/spec/36144/'),
]
for url, postfix, real in ls:
join_url(url, postfix, real)
函数(复制用)
from urllib.parse import urlsplit, urljoin
def join_url(url, postfix):
tu = urlsplit(url)
domain = tu[0] + '://' + tu[1]
return urljoin(domain, postfix)
url = 'https://github.com/AryeYellow'
postfix = '/AryeYellow/NLP'
url_total = join_url(url, postfix)
print(url_total)
parse
from urllib import parse
keyword = 'Python爬虫'
# url编码
dic = {"wd": keyword, 'q': 120}
wd = parse.urlencode(dic)
print(wd)
# quote(引述;报价;引文;引号;)
quote = parse.quote(keyword)
print(quote)
# url解码
unquote = parse.unquote(quote)
print(unquote)
-
wd=Python%E7%88%AC%E8%99%AB&q=120
Python%E7%88%AC%E8%99%AB
Python爬虫
函数(复制用)
from urllib import parse
def encode_url(url, dt):
wd = parse.urlencode(dt)
return url + wd
url = 'https://www.baidu.com/s?'
keyword = 'Python爬虫'
dt = {'ie': 'UTF-8', 'wd': 'K房'}
print(encode_url(url, dt)) # https://www.baidu.com/s?ie=UTF-8&wd=K%E6%88%BF