Python3练习装饰器

# 获取网页的键为'title'\'content'的字典, 其内容为网页标题和html主体
def requestsTextDecorator(func):
    def getHtml(*args, **kwargs):
        data = func(*args, **kwargs)
        try:
            import re
            title = re.findall('<title>(.*?)</title>', data['content'])[0].strip()
        except Exception as e:
            return e
        return {'picBool': data['picBool'], 'title': title, 'content': data['content']}
    return getHtml
# 获取一个页面的所有的本站内图片或文本其他链接
def requestsLinksDecorator(func):
    def getLinks(*args, **kwargs):
        data = func(*args, **kwargs)
        import re
        try:
            if data['picBool']:
                links = re.findall('<img src="(.*?)".*?>', data['content'])
            if not data['picBool']:
                links = re.findall('<a href="(.*?)".*?>', data['content'])
        except Exception as e:
            raise e
        finally:
            result_links = []
            for link in links:
                if 'http' not in link:
                    result_links.append(args[0] + link[1:])
                elif args[0] == link or args[0] not in link:
                    continue
                else:
                    result_links.append(link)
            return {'title': data['title'], 'links': result_links}
    return getLinks

@requestsLinksDecorator
@requestsTextDecorator
# 对requests库进行封装, 输入url, 自动识别编码, 输出html网页
def requestsEncapsulation(url, picBool=False, **kwargs):
    if kwargs == {}:
        headers = {'User-Agent': 'User-Agent: Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)',}
    else:
        headers = kwargs
    try:
        import requests
        for i in range(5):
            res = requests.get(url, headers=kwargs, timeout=5)
            if res.status_code == 200:
                break
            elif i == 4:
                raise Exception('链接打开失败!')
        res.encoding = res.apparent_encoding
    except Exception as e:
        return e
    else:
        if picBool:
            return {'picBool': True, 'content': res.text}
        else:
            return {'picBool': False, 'content': res.text}
data = requestsEncapsulation('http://www.runoob.com/', picBool=False)
print(data)

猜你喜欢

转载自blog.csdn.net/weixin_43690548/article/details/88835148