版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
对得到App电子书进行抓取,数据分析最受欢迎的电子书:
遇到存储MongoDB时pymongo导包错误,应该是一开始建文件是时未引入包,放到包文件目录可以执行。
目前先存txt文件在通过MongoDB导入,有点麻烦。
mitmdump代码:
from mitmproxy import ctx
import json
# https://blog.csdn.net/yyz_yinyuanzhang/article/details/83930065
# 单独存储,导入pymongo错误:
def write_to_file(content):
with open('books.txt', 'a', encoding='utf-8') as f:
f.write(json.dumps(content, ensure_ascii=False) + '\n')
def response(flow):
url='https://entree.igetget.com/ebook2/v1/ebook/list'
if flow.request.url.startswith(url):
text=flow.response.text
data=json.loads(text)
books=data['c']['list']
for book in books:
#ctx.log.info(str(book))
item = {}
item['title'] = book['operating_title']
item['id'] = book['id']
item['cover'] = book['cover']
item['createTime']=book['other_share_summary']
item['price'] = book['price']
item['publish_time'] = book['publish_time']
item['book_intro'] = book['book_intro']
ctx.log.info(str(item))
#write_to_file(item)
appium滑动:
import time
from selenium.webdriver.support.ui import WebDriverWait
from appium import webdriver
desired_caps = {}
desired_caps['platformName'] = 'Android'
desired_caps['deviceName'] = '127.0.0.1:62001'
desired_caps['platformVersion'] = '5.1.1'
desired_caps['appPackage'] = 'com.luojilab.player'
desired_caps['appActivity'] = 'com.luojilab.business.welcome.SplashActivity'
desired_caps['noReset'] = True
driver = webdriver.Remote('http://127.0.0.1:4723/wd/hub', desired_caps)
def get_size():
x = driver.get_window_size()['width']
y = driver.get_window_size()['height']
return (x,y)
if WebDriverWait(driver,3).until(lambda x:x.find_element_by_xpath("//android.widget.GridView[@resource-id='com.luojilab.player:id/categoryGridView']/android.widget.LinearLayout[2]")):
driver.find_element_by_xpath("//android.widget.GridView[@resource-id='com.luojilab.player:id/categoryGridView']/android.widget.LinearLayout[2]").click()
size = get_size()
x1 = int(size[0]*0.5)
y1 = int(size[1]*0.9)
y2 = int(size[1]*0.15)
#滑动操作
while True:
driver.swipe(x1,y1,x1,y2)
time.sleep(0.1)
存MongoDB数据库:分析
from pymongo import MongoClient
import json
client = MongoClient('localhost', port=27017)
collection = client["fd"]["dedao_books1"]
f = open('books.txt', 'r+',encoding='utf-8')
for i in f.readlines():
new = json.loads(i)
collection.insert(new)