import requests
import re
import json
from bs4 import BeautifulSoup
from urllib.request import urljoin
import collections
from pyecharts import Map
def get_view_history_link(search_word):
"该函数用来获取历史编辑连接地址"
root_url = 'https://en.wikipedia.org'
url = root_url + '/wiki/' + search_word
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) \
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}
res = requests.get(url,headers=headers)
soup = BeautifulSoup(res.text,'lxml')
relavtiv_link = soup.find('li',{'id':'ca-history'}).span.a['href']
return urljoin(root_url,relavtiv_link)
def get_ip(search_word):
'该函数用来获取匿名修改的IP地址'
url = get_view_history_link(search_word)
soup = BeautifulSoup(requests.get(url).text,'lxml')
page_500_link = urljoin('https://en.wikipedia.org',soup.find_all('a',{'class':'mw-numlink'})[-1]['href'])
soup = BeautifulSoup(requests.get(page_500_link).text,'lxml')
ips = soup.find_all('a',{'class':'mw-anonuserlink'})
set_ips = set()
for ip in ips:
set_ips.add(ip.text)
return set_ips
def get_country(ip):
'该函数用来获取ip所对应的国家'
try:
res = requests.get('https://freegeoip.net/json/' + ip)
except:
print('无效的ip地址')
data_json = json.loads(res.text)
country = data_json.get('country_code')
return(ip,country)
if __name__ == '__main__':
ips = get_ip('Python_(programming_language)')
results = []
for ip in ips:
result = get_country(ip)
results.append(result)
country_ab = {}
f = open(r'C:\Users\CW\Desktop\ab.txt','r')
for line in f:
if len(line)>1:
country_ab[re.findall('[a-zA-Z]+',line)[0]] = ' '.join(re.findall('[a-zA-Z]+',line)[1:])
f.close()
countrys = [i[1] for i in results ]
stats = collections.Counter(countrys)
cut_stats = sorted(stats.items(),key=lambda x:x[1],reverse=True)
attr = [country_ab[i[0]] for i in cut_stats]
value = [i[1] for i in cut_stats]
word_map = Map("维基词条编辑每个国家贡献", width=800, height=400)
word_map.add("", attr, value, maptype="world", is_visualmap=True,
is_piecewise=True,visual_text_color='#000', is_map_symbol_show=False,
pieces=[{"max": 25, "min": 25, "label":"25"},{"max": 24, "min": 10, "label":"24-10"},
{"max": 10, "min": 0, "label":"<10"}])