使用requests登录github:
import requests
from bs4 import BeautifulSoup
def main():
resp = requests.get('https://github.com/login')
if resp.status_code != 200:
return
cookies = resp.cookies.get_dict()
soup = BeautifulSoup(resp.text, 'lxml')
utf8_value = soup.select_one('form input[name=utf8]').attrs['value']
authenticity_token = soup.select_one('form input[name=authenticity_token]').attrs['value']
data = {
'utf8':utf8_value,
'authenticity_token':authenticity_token,
'login':'[email protected]',
'password':'123456789'
}
resp = requests.post('https://github.com/session', data=data, cookies=cookies)
print(resp.text)
if __name__ == '__main__':
main()
robobrowser模拟浏览器登录github:
import robobrowser
def main():
b = robobrowser.RoboBrowser(parser='lxml')
b.open('https://github.com/login')
f = b.get_form(action='/session')
f['login'].value = '[email protected]'
f['password'].value = '123456789'
b.submit_form(f)
for a in b.select('a[href]'):
print(a.attrs['href'])
if __name__ == '__main__':
main()
尝试用robobrowser模拟浏览器获取淘女郎动态生成图片(失败):
import robobrowser
def main():
b = robobrowser.RoboBrowser(parser='lxml')
b.open('https://v.taobao.com/v/content/live?catetype=704&from=taonvlang')
for img_tag in b.select('img[src]'):
print(img_tag)
if __name__ == '__main__':
main()
selenium测试工具模拟浏览器获取淘女郎动态图片(成功):
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
def main():
driver = webdriver.Chrome()
driver.get('https://v.taobao.com/v/content/live?catetype=704&from=taonvlang')
elem = driver.find_element_by_css_selector('input[placeholder=输入关键词搜索]')
elem.send_keys('运动')
elem.send_keys(Keys.ENTER)
soup = BeautifulSoup(driver.page_source, 'lxml')
for img_tag in soup.select('img[src]'):
print(img_tag.attrs['src'])
if __name__ == '__main__':
main()
图像处理与图像识别
from PIL import Image, ImageFilter
from io import BytesIO
from pytesseract import image_to_string
import requests
def main():
img = Image.open(open('guido.jpg', 'rb'))
img2 = img.filter(ImageFilter.GaussianBlur)
img2.save(open('guido2.jpg', 'wb'))
img = Image.open(open('aliwangwang.png', 'rb'))
img2 = Image.open(open('aliwangwang2.png', 'rb'))
img3 = img.point(lambda x: 0 if x < 128 else 255)
img3.save(open('aliwangwang2.png', 'wb'))
print(image_to_string(img2))
resp = requests.get('http://pin2.aliyun.com/get_img?type=150_40&identity=mailsso.mxhichina.com&sessionid=k0xHyBxU3K3dGXb59mP9cdeTXxL9gLHSTKhRZCryHxpOok4L')
img4 = Image.open(BytesIO(resp.content))
img4.save('hello.jpg')
print(image_to_string(img4))
if __name__ == '__main__':
main()