写在前面
- 批量处理
pandas.DataFrame
行列数据
- 20200212,效率不怎么高
代码实现
import re
import numpy as np
import pandas as pd
"""
只要字符串中的中文,数字
"""
def hq_zw_zm_sz(ss):
if ss is not None and (ss is not np.nan):
res = re.findall(r'[\u4e00-\u9fa50-9]+',ss,re.S)
if res:
return "".join(res)
return None
if __name__=='__main__':
str1 = ' 武\n汉加\t油,中\th国\n加\t油!'
print(hq_zw_zm_sz(str1))
str2 = ' 8\t6\t852\t853\n886 \t 027 '
print(hq_zw_zm_sz(str2))
str2 = ' '
print(hq_zw_zm_sz(str2))
cs_datas = pd.read_excel('./datas.xlsx', encoding='utf8', dtype=str,index_col=None)
cs_datas_sected = cs_datas.head(3)
columns = cs_datas_sected.columns
for col in columns:
cs_datas_sected[col] = cs_datas_sected[col].apply(lambda tcol : hq_zw_zm_sz(tcol))
import re
import numpy as np
import pandas as pd
"""
只要字符串中的中文,数字
"""
def hq_zw_zm_sz(ss):
if ss is not None and (ss is not np.nan):
res = re.findall(r'[\u4e00-\u9fa50-9]+',ss,re.S)
if res:
return "".join(res)
return None
if __name__=='__main__':
str1 = ' 武\n汉加\t油,中\th国\n加\t油!'
print(hq_zw_zm_sz(str1))
str2 = ' 8\t6\t852\t853\n886 \t 027 '
print(hq_zw_zm_sz(str2))
str2 = ' '
print(hq_zw_zm_sz(str2))
"""
遍历pandas行数据并进行格式化处理
"""
def format_rows(df):
new_rw_results = []
columns = df.columns
for inx, rw in df.iterrows():
for col in columns:
new_rws = [hq_zw_zm_sz(rw[col]) for col in columns]
new_rw_results.append(new_rws)
new_df = pd.DataFrame(new_rw_results, columns=columns)
return new_df
cs_datas = pd.read_excel('./datas_cs.xlsx', encoding='utf8', dtype=str,index_col=None)
cs_datas_sected = cs_datas.head(3)
dffs = format_rows(cs_datas_sected)
dffs.head(3)
末尾小结
- pandas.DataFrame批量行处理记录,以便后续使用
- 心情不好时,听听音乐、撸撸代码,自愈疗伤