python通过adb连接爬取支付宝移动端的账单信息,操作过程如下:
于是此文件就可以分解为如下四个主要功能:
1.图片识别;2.模拟点击;3.模拟滑动;4.截图功能
一.对账单详情页的处理:
对账单详情页截图后,再选择用图片分割,捕捉到3个截图,分别是:名称字段,账单数额字段;账单编号字段;
二.对账单缩略页的处理:
如何处理账单的缩略页,开始时自己是想,传入一个基准,然后每次向下遍历一个等额的数值进行点击,获取详情页的数据,但是这样的话,每次只能获取一页,而且对遍历的条件无法得知,于是自己就选择了读完一条数据就向下滚动的方法,进行遍历每一条的账单;
三.对每月账单的筛选:
当自己选择读完一条数据向下滚动时,此时加上一个判断条件,即如果接下来截图并且分割好的图片中如果含有'月'或者'目'的字段时,就将其判别为当月结束,跳出循环,否则向下滚动,并且模拟点击;
# -*- coding: utf-8 -*-
import os
import math
import matplotlib.pyplot as plt
import pytesseract
from PIL import Image
import time
import csv
import numpy as np
import matplotlib.image as mpimg
class Pay_ali:
def __init__(self):
self._coefficient = 1.35
self._click_count = 0
self._coords = []
#输出函数
def print_all(self,text):
print(text)
print('\n')
# 图片识别
def img_rec(self,img):
text = pytesseract.image_to_string(Image.open(img), lang='chi_sim')
# print(type(text))
self.print_all(text)
return text
#点击功能
def acquire_info(self,x1,y1):
x1 = str(x1)
y1 = str(y1)
os.system('adb shell input tap ' + x1 + ' ' + y1)
#滑动功能
def acquire_swipe(self,x1,y1,x2,y2):
x1 = str(x1)
y1 = str(y1)
x2 = str(x2)
y2 = str(y2)
os.system('adb shell input swipe ' + x1 + ' ' + y1+' '+x2+' '+y2)
# 截图功能
def cut_info(self,path):
path = str(path)
# screenshot.png.
os.system('adb shell screencap -p /sdcard/'+path)
os.system('adb pull /sdcard/'+path)
# 切分+识别功能
def seg_info(self,x1,y1,x2,y2,path,out_path):
path = str(path)
out_path = str(out_path)
# 此处有不足的情况,截图会覆盖
img = Image.open(path)
region=(x1,y1,x2,y2)
# print(region)
cropImg = img.crop(region)
cropImg.save(out_path)
text = self.img_rec(out_path)
return text
# 显示图片功能
def img_show(self,src):
img = Image.open(src)
plt.imshow(img)
plt.show()
# 获取当月信息
def acquire_info_month(self,start,end):
# start+=262
while (end - start) >= 100:
# start += 262
time.sleep(1)
self.cut_info('screen2.png')
text = self.seg_info(23,start,200,start+250,'screen2.png','crop2.png')
if(text.find('月')>=0 or text.find('目')>=0):
# self.acquire_swipe(23,start+300,23,start)
break
else:
self.acquire_swipe(23,start+220,23,start)
self.acquire_info(240,start+100)
time.sleep(1)
self.cut_info('screen1.png')
time.sleep(1)
self.csv_save()
time.sleep(1)
self.acquire_info(302.8,2100.3)
# 将数据保存成csv
def csv_save(self):
name = self.seg_info(482, 258, 693, 332, 'screen1.png', 'crop1.png')
money = self.seg_info(305, 367, 747, 492, 'screen1.png', 'crop1.png')
money_info = self.seg_info(20, 646, 1061, 1709, 'screen1.png', 'crop1.png')
# money_info = self.seg_info(20, 646, 1061, 1709, 'screen1.png', 'crop1.png')
rows = [(name,money,money_info)]
with open('pay_ali.csv','a') as f:
f_csv = csv.writer(f)
f_csv.writerows(rows)
# 运行函数
def run(self):
self.acquire_info(972.8,1954.93)
self.acquire_info(254,714)
self.acquire_info(240,732)
time.sleep(1)
self.cut_info('screen1.png')
time.sleep(1)
# self.seg_info(342.7, 364.2, 707.7, 489.6)
# self.seg_info(417,258,704,338)
self.csv_save()
time.sleep(1)
self.acquire_info(302.8,2100.3)
# self.img_show('crop.png')
self.acquire_info_month(475,2023)
if __name__ == "__main__":
pay_ali = Pay_ali()
pay_ali.run()
# pay_ali.cut_info('screen_test.png')
# pay_ali.img_show('screen_test.png')
整个小demo实现难度不大,现在存在的问题在于如何适配不同的机型,欢迎拍砖~