版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/qq_34272093/article/details/82703275
python-爬虫
# import urllib.request # import http.cookiejar # import pymysql # conn = pymysql.connect("localhost", "root", "123456", "test") # cursor = conn.cursor() # cursor.execute("DROP TABLE IF EXISTS employee") # sql = """CREATE TABLE employee(first_name CHAR(20) NOT NULL, # last_name CHAR(20), # age INT, # sex CHAR(1))""" # cursor.execute(sql) # sqlInsert = """INSERT INTO employee(first_name,last_name,age,sex) VALUES('李白','白居易',20,'男')""" # try: # cursor.execute(sqlInsert) # cursor.execute(sqlInsert) # conn.commit() # except: # conn.rollback() # conn.close() # 爬虫 # import requests # from bs4 import BeautifulSoup # import pymysql # # # 本地数据库 # sql_host = 'localhost' # # 数据库的用户名 # sql_user = 'root' # # 数据库密码 # sql_password = '123456' # # 数据的名 # sql_name = 'test' # SQL_INSERT = """INSERT INTO user_data(author,page,sex,age,vote,content) VALUES(%s,%s,%s,%s,%s,%s)""" # # def download_page(http_url): # headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:61.0) Gecko/20100101 Firefox/61.0"} # call_back = requests.get(http_url, headers=headers) # return call_back.text # # # def get_page_content(html, page): # conn = pymysql.connect(sql_host, sql_user, sql_password, sql_name) # cursor = conn.cursor() # soup = BeautifulSoup(html, 'html.parser') # con = soup.find(id='content-left') # con_list = con.find_all('div', class_='article') # for item in con_list: # author = item.find('h2').string # content = item.find('div', class_='content').find('span').get_text() # stats = item.find('div', class_='stats') # vote = stats.find('span', class_='stats-vote').find('i', class_='number').get_text() # comments = stats.find('span', class_='stats-comments').find('i', class_='number').string # author_info = item.find('div', class_='articleGender') # if author_info is not None: # class_list = author_info['class'] # age = author_info.string # if 'womenIcon' in class_list: # sex = '女' # elif 'manIcon' in class_list: # sex = '男' # else: # sex = '' # else: # sex = '' # age = '' # # cursor.execute(SQL_INSERT, ("name","data","gg","sd","dd")) # cursor.execute(SQL_INSERT,(author,page,sex,age,vote,content)) # conn.commit() # # conn.close() # # def main(): # conn = pymysql.connect(sql_host, sql_user, sql_password, sql_name) # cursor = conn.cursor() # cursor.execute("""DELETE FROM user_data""") # conn.commit() # conn.close() # for i in range(1, 14): # http_url = 'https://qiushibaike.com/text/page/{}'.format(i) # html = download_page(http_url) # get_page_content(html,i) # # # if __name__ == '__main__': # main() import requests from bs4 import BeautifulSoup import pymysql # 网页格式Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0 <div class="articleGender manIcon">20</div> def get_html(url): headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:62.0) Gecko/20100101 Firefox/62.0'} call_back = requests.get(url,headers = headers) return call_back.text def get_page_content(html,page): soup = BeautifulSoup(html,'html.parser') content = soup.find(id='content-left') content_list = content.find_all('div',class_='article') for list_item in content_list: author_div = list_item.find('div',class_='author') author = author_div.find('h2').string author_info = author_div.find('div',class_='articleGender') if author_info is not None: info_list = author_info['class'] age = author_info.string if 'manIcon' in info_list: sex = '男' elif 'womenIcon' in info_list: sex = '女' else: sex = '' else: age = '' sex = '' print(author,sex,age) if __name__ == '__main__': url = 'https://www.baidu.com/' html = get_html(url) get_page_content(html,1)
java -spring_boo-mysql
数据类
@Entity public class UserData { @Id private int id; private String author; private String page; private String sex; private String age; private String vote; private String content; } @Repository public interface User extends JpaRepository<UserData,String> { }
接口控制器
@RestController @RequestMapping(value = "/name") public class Data { @Autowired private User user; @RequestMapping(value = "/data") public List<UserData> name() { return user.findAll(); } }