YY点触验证码识别案例
这个网站在第一次请求网站的时候,用selenium定位不到验证码,最后发现是iframe标签的问题
我们需要再次请求iframe里面的链接,打开新的网页之后就可以定位操作了。
1 # !/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 4 from chaojiying import Chaojiying 5 import random 6 import time 7 from PIL import Image 8 from io import BytesIO 9 from selenium import webdriver 10 from selenium.webdriver.support.ui import WebDriverWait 11 from selenium.webdriver import ActionChains 12 from selenium.webdriver.common.by import By 13 from selenium.webdriver.support import expected_conditions as ec 14 15 CHAOJIYING_NAME = 'dcxm2016' 16 CHAOJIYING_PWD = 'shuxue2016tongji' 17 CHAOJIYING_ID = 894613 18 CHAOJIYING_KIND = 9004 19 20 21 class YYVerification(object): 22 """ 23 此类用于YY的验证码识别,可以应用到类似的验证码识别上,这种验证码类型是 24 点击类验证码.这里我们是对接打码兔平台。 25 """ 26 def __init__(self): 27 # 28 self.url = 'https://aq.yy.com/p/reg/account.do?appid=&url=&fromadv=udbclsd_r' 29 self.driver = webdriver.Chrome() 30 self.chaojiying = Chaojiying(CHAOJIYING_NAME, CHAOJIYING_PWD, CHAOJIYING_ID) 31 32 def __del__(self): 33 self.driver.close() 34 35 def screen_shot(self): 36 # self.driver.maximize_window() 37 time.sleep(2) 38 # # 用于网页的向下滚动 39 # js = 'var q=document.documentElement.scrollTop=300' 40 # self.driver.execute_script(js) 41 # time.sleep(1) 42 self.driver.save_screenshot('yy.png') 43 return True 44 45 def i_url(self): 46 i = self.driver.find_element_by_xpath('/html/body/div[2]/div[2]/div/iframe') 47 url_1 = i.get_attribute('src') 48 print(url_1) 49 self.driver.get(url_1) 50 wait = WebDriverWait(self.driver, 20) 51 element = wait.until(ec.presence_of_element_located((By.XPATH, '//*[@id="mPickWords"]/div[1]'))) 52 return element 53 54 def shear_location(self): 55 time.sleep(random.random() + 1) 56 print('正在获取div') 57 div = self.i_url() 58 start_x = div.location['x'] + 25 59 start_y = div.location['y'] + 65 60 end_x = div.location['x'] + 350 + 25 61 end_y = div.location['y'] + 218 + 65 62 result = (start_x, start_y, end_x, end_y) 63 print(result) 64 return result, div 65 66 @staticmethod 67 def shear_image(axis): 68 im = Image.open('yy.png') 69 new_image = im.crop(axis) 70 new_image.save('new_img.png') 71 return new_image 72 73 def upload_picture(self, img): 74 """ 75 上传图片(Byte),返回点击的坐标(。 76 :param img: 上传的图片 77 :return: 点击坐标 78 """ 79 image = img 80 byte_array = BytesIO() 81 image.save(byte_array, format('PNG')) 82 # 提交图片 83 result = self.chaojiying.post_pic(byte_array.getvalue(), CHAOJIYING_KIND) 84 print(result) 85 if '无可用题分' in result['err_str']: 86 print('题分已经不足请充值') 87 raise ValueError 88 pic_str = result['pic_str'] 89 pic_list = [[i for i in x.split(',')] for x in pic_str.split('|')] 90 for i in pic_list: 91 i[0] = int(int(i[0])*(282/354)) 92 i[1] = int(int(i[1])*(219/274)) 93 print(pic_list) 94 return pic_list 95 96 def click_now(self, coordinates, axis, element): 97 print('点击开始') 98 print(coordinates) 99 for location in coordinates: 100 ActionChains(self.driver).move_to_element_with_offset(element, location[0], location[1]).click().perform() 101 time.sleep(random.random() + 1.8) 102 submission = self.driver.find_element_by_xpath('//*[@id="mPickWords"]/div[2]/span[4]') 103 submission.click() 104 time.sleep(1) 105 if 'pw_submit_disabled' in submission.get_attribute('class'): 106 return '点击成功' 107 else: 108 time.sleep(2) 109 self.revalidation(axis, element) 110 return '点击失败' 111 112 def revalidation(self, axis, element): 113 self.screen_shot() 114 if not self.screen_shot(): 115 return '截图失败' 116 time.sleep(1) 117 # 剪切图片 118 new_image = self.shear_image(axis) 119 new_image.show() 120 # 上传图片并返回点击坐标 121 click_coordinates = self.upload_picture(new_image) 122 # 点击验证码 123 print(self.click_now(click_coordinates, axis, element)) 124 return '点击验证结束' 125 126 def main(self): 127 self.driver.get(self.url) 128 wait = WebDriverWait(self.driver, 20) 129 wait.until(ec.presence_of_element_located((By.XPATH, '/html/body/div[2]/div[2]/div/iframe'))) 130 axis, element = self.shear_location() 131 self.revalidation(axis, element) 132 133 134 if __name__ == '__main__': 135 yy = YYVerification() 136 yy.main()