数据读取:
info_keliu = pd.read_excel(r'D:\project资料\公交.xlsx', sheet_name = 'Sheet1')
数据截取:
info_kl = info_keliu[info_keliu['居住地'] != '未知']
info = info_kl.astype(str)
info_cc = info[info['工作地'].str.contains('禅城')]
数据去除null:
df.dropna(axis = , how = 'any')有一个为null
df.dropna(axis = , how = 'all')整行/列都是null
indexs = list(df[np.isnan(df['aaa'])].index)/df[np.isnan(df['aaa'])].index.tolist()
df = df.drop(indexs)删除nan行
获取不为nan的行
df = df[np.isnan(df['aaa']) == False]
axis解释:
aaa = np.array([2,3],[3,4])
aaa = 2, 1
3, 4
np.mean(aaa, axis = 0)
out: (2,5, 2.5) (2+3)/2 = 2.5, (1+4)/2= 2.5
axis = 0, 即沿着列的方向,对行进行操作
np.mean(aaa, axis = 1)
out: (1.5, 3.5) (2+1)/2 = 1.5, (3+4)/2= 3.5
axis = 1, 即沿着行的方向,对列进行操作
数据去重:
list_gzd_new = []
for one in list_gzd:
if one not in list_gzd_new:
list_gzd_new.append(one)
dataframe转为列表:先转为series,在tolist
destination = np.array(from_jzd['destination']).tolist()
dict:
#构建json文件,输入到高德API的多路径程序中,为网上寻找的现有程序
all_route = []
###################################这个地方,注意route = {}写在循环内,和循环外的区别
for i in range(len(flat)):
route = {}
#print(flat[i])
route['flng'] = flat[i]
route['flat'] = flng[i]
route['tlng'] = glat[i]
route['tlat'] = glng[i]
all_route.append(route)
print(route)
#print(all_route)
route_fs = json.dumps(all_route)
################################################# 字符串 ##########################################
#字符串
a = 'abc'
b = a[::-1]
b = 'cba'
################################################### json ###############################################
json读写:
"""
dumps:序列化一个对象
sort_keys:根据key排序
indent:以4个空格缩进,输出阅读友好型
ensure_ascii: 可以序列化非ascii码(中文等)
"""
s_dumps = json.dumps(data_obj, sort_keys=True, indent=4, ensure_ascii=False)
print(s_dumps)
# ---------------------------------------------------分割线------------------------------------------------------------
"""
dump:将一个对象序列化存入文件
dump()的第一个参数是要序列化的对象,第二个参数是打开的文件句柄
注意打开文件时加上以UTF-8编码打开
* 运行此文件之后在统计目录下会有一个data.json文件,打开之后就可以看到json类型的文件应该是怎样定义的
"""
with open("data.json", "w", encoding="UTF-8") as f_dump:
s_dump = json.dump(data_obj, f_dump, ensure_ascii=False)
print(s_dump)
"""
load:从一个打开的文件句柄加载数据
注意打开文件的编码
"""
with open("data.json", "r", encoding="UTF-8") as f_load:
r_load = json.load(f_load)
print(r_load)
# ---------------------------------------------------json------------------------------------------------------------
"""
loads: 从一个对象加载数据
"""
r_loads = json.loads(s_dumps)
print(r_loads)
arg = '{"bakend": "www.oldboy.org", "record": {"server": "100.1.7.9", "weight": 20, "maxconn": 30}}'
a = json.loads(input('请输入添加的数据:'),encoding='utf-8')
print(a)
############################################### 数据库mysql #################################################
#连接数据库
import pymysql
from sqlalchemy import create_engine
db = create_engine('mysql+pymysql://root:[email protected]:5029/Data_0104?charset=utf8')
sql = 'select * from gf_subway_5mm_1216 where station LIKE"珠江新城" and start_date_time BETWEEN "2017-12-16 08:00:00" AND "2017-12-16 08:30:00"'
df = db.execute(sql)
sql:
union去重,
union all 不去重
########################################### 正则表达式 #####################################
a = 'ItemScore(1805100000001637,7.688509220116103), ItemScore(1805080000001600,7.684840663118415), ItemScore(1805110000001662,7.683625207896754)'
pa = r'\d+,\d.\d+'#提取括号中数字
re.findall(a, pa)
########################################### 编码内容 ###############################################
#unicode码
one[1].encode('utf-8').decode('unicode_escape')
数据处理常用语句
猜你喜欢
转载自blog.csdn.net/weixin_41512727/article/details/80810320
今日推荐
周排行