# -*- coding:utf-8 -*- import pandas as pd import pymysql from sqlalchemy import create_engine from math import sqrt ## 加上字符集参数,防止中文乱码 dbconn = pymysql.connect( host="127.0.0.1", database="fight", user="root", password="111111", port=3306, charset='utf8') conn = create_engine('mysql+mysqldb://root:111111@localhost:3306/fight?charset=utf8') #上面这一大段等同于conn = create_engine('mysql+mysqldb://root:111111@localhost:3306/test?charset=utf8') # sql语句 sqlcmd = "select * from shape_mode_k7f3" data = pd.read_sql(sqlcmd, dbconn) #x = data.loc[len(data)-1]#取最后一行的值,用来计算 #按条件筛选出某一行计算距离 date = '2018-03-29' code = 'SZ000975' x = data.loc[(data['secode']==code) & (data['mkdate']==date),]#这里取成了一个dataframe #定义一个计算开方的函数,不然会说是series无法开方 def calc_sqrt(num): return sqrt(num) #用到计算距离里面的欧式距离,以及避免替换数字的麻烦 t=2 r=1 w=0.5 ww=0.3 www=0.2 #提取每天的值 x1_op = [float(i) for i in x['_r_Fop_1'] ]#这里去把dataframe变成了列表 x1_op = x1_op[0]#这里从列表中具体进行取值 x1_cp = [float(i) for i in x['_r_Fcp_1'] ]#这里去把dataframe变成了列表 x1_cp = x1_cp[0]#这里从列表中具体进行取值 x1_hp = [float(i) for i in x['_r_Fhp_1'] ]#这里去把dataframe变成了列表 x1_hp = x1_hp[0]#这里从列表中具体进行取值 x1_maxp = [float(i) for i in x['_r_maxco_1'] ]#这里去把dataframe变成了列表 x1_maxp = x1_maxp[0]#这里从列表中具体进行取值 x1_lp = [float(i) for i in x['_r_Flp_1'] ]#这里去把dataframe变成了列表 x1_lp = x1_lp[0]#这里从列表中具体进行取值 x1_minp = [float(i) for i in x['_r_minco_1'] ]#这里去把dataframe变成了列表 x1_minp = x1_minp[0]#这里从列表中具体进行取值 #第一条K线 #实体相似度 data['juli_oc1']= (pow(data['_r_Fop_1']-x1_op,t)+pow(data['_r_Fcp_1']-x1_cp,t)) data['juli_oc1']=data.apply(lambda row:calc_sqrt(row['juli_oc1']),axis=r) #上影线相似度 data['juli_hmax1']= (pow(data['_r_Fhp_1']-x1_hp,t)+pow(data['_r_maxco_1']-x1_maxp,t)) data['juli_hmax1']=data.apply(lambda row:calc_sqrt(row['juli_hmax1']),axis=r) #下影线相似度 data['juli_lmin1']= (pow(data['_r_Flp_1']-x1_lp,t)+pow(data['_r_minco_1']-x1_minp,t)) data['juli_lmin1']=data.apply(lambda row:calc_sqrt(row['juli_lmin1']),axis=r) #整体相似度距离 data['juli_1']=data['juli_oc1']*w+data['juli_hmax1']*ww+data['juli_lmin1']*www #提取每天的值 x2_op = [float(i) for i in x['_r_Fop_2'] ]#这里去把dataframe变成了列表 x2_op = x2_op[0]#这里从列表中具体进行取值 x2_cp = [float(i) for i in x['_r_Fcp_2'] ]#这里去把dataframe变成了列表 x2_cp = x2_cp[0]#这里从列表中具体进行取值 x2_hp = [float(i) for i in x['_r_Fhp_2'] ]#这里去把dataframe变成了列表 x2_hp = x2_hp[0]#这里从列表中具体进行取值 x2_maxp = [float(i) for i in x['_r_maxco_2'] ]#这里去把dataframe变成了列表 x2_maxp = x2_maxp[0]#这里从列表中具体进行取值 x2_lp = [float(i) for i in x['_r_Flp_2'] ]#这里去把dataframe变成了列表 x2_lp = x2_lp[0]#这里从列表中具体进行取值 x2_minp = [float(i) for i in x['_r_minco_2'] ]#这里去把dataframe变成了列表 x2_minp = x2_minp[0]#这里从列表中具体进行取值 #第二条K线 #实体相似度 data['juli_oc2']= (pow(data['_r_Fop_2']-x2_op,t)+pow(data['_r_Fcp_2']-x2_cp,t)) data['juli_oc2']=data.apply(lambda row:calc_sqrt(row['juli_oc2']),axis=r) #上影线相似度 data['juli_hmax2']= (pow(data['_r_Fhp_2']-x2_hp,t)+pow(data['_r_maxco_2']-x2_maxp,t)) data['juli_hmax2']=data.apply(lambda row:calc_sqrt(row['juli_hmax2']),axis=r) #下影线相似度 data['juli_lmin2']= (pow(data['_r_Flp_2']-x2_lp,t)+pow(data['_r_minco_2']-x2_minp,t)) data['juli_lmin2']=data.apply(lambda row:calc_sqrt(row['juli_lmin2']),axis=r) #整体相似度距离 data['juli_2']=data['juli_oc2']*w+data['juli_hmax2']*ww+data['juli_lmin2']*www #提取每天的值 x3_op = [float(i) for i in x['_r_Fop_3'] ]#这里去把dataframe变成了列表 x3_op = x3_op[0]#这里从列表中具体进行取值 x3_cp = [float(i) for i in x['_r_Fcp_3'] ]#这里去把dataframe变成了列表 x3_cp = x3_cp[0]#这里从列表中具体进行取值 x3_hp = [float(i) for i in x['_r_Fhp_3'] ]#这里去把dataframe变成了列表 x3_hp = x3_hp[0]#这里从列表中具体进行取值 x3_maxp = [float(i) for i in x['_r_maxco_3'] ]#这里去把dataframe变成了列表 x3_maxp = x3_maxp[0]#这里从列表中具体进行取值 x3_lp = [float(i) for i in x['_r_Flp_3'] ]#这里去把dataframe变成了列表 x3_lp = x3_lp[0]#这里从列表中具体进行取值 x3_minp = [float(i) for i in x['_r_minco_3'] ]#这里去把dataframe变成了列表 x3_minp = x3_minp[0]#这里从列表中具体进行取值 #第三条K线 #实体相似度 data['juli_oc3']= (pow(data['_r_Fop_3']-x3_op,t)+pow(data['_r_Fcp_3']-x3_cp,t)) data['juli_oc3']=data.apply(lambda row:calc_sqrt(row['juli_oc3']),axis=r) #上影线相似度 data['juli_hmax3']= (pow(data['_r_Fhp_3']-x3_hp,t)+pow(data['_r_maxco_3']-x3_maxp,t)) data['juli_hmax3']=data.apply(lambda row:calc_sqrt(row['juli_hmax3']),axis=r) #下影线相似度 data['juli_lmin3']= (pow(data['_r_Flp_3']-x3_lp,t)+pow(data['_r_minco_3']-x3_minp,t)) data['juli_lmin3']=data.apply(lambda row:calc_sqrt(row['juli_lmin3']),axis=r) #整体相似度距离 data['juli_3']=data['juli_oc3']*w+data['juli_hmax3']*ww+data['juli_lmin3']*www #提取每天的值 x4_op = [float(i) for i in x['_r_Fop_4'] ]#这里去把dataframe变成了列表 x4_op = x4_op[0]#这里从列表中具体进行取值 x4_cp = [float(i) for i in x['_r_Fcp_4'] ]#这里去把dataframe变成了列表 x4_cp = x4_cp[0]#这里从列表中具体进行取值 x4_hp = [float(i) for i in x['_r_Fhp_4'] ]#这里去把dataframe变成了列表 x4_hp = x4_hp[0]#这里从列表中具体进行取值 x4_maxp = [float(i) for i in x['_r_maxco_4'] ]#这里去把dataframe变成了列表 x4_maxp = x4_maxp[0]#这里从列表中具体进行取值 x4_lp = [float(i) for i in x['_r_Flp_4'] ]#这里去把dataframe变成了列表 x4_lp = x4_lp[0]#这里从列表中具体进行取值 x4_minp = [float(i) for i in x['_r_minco_4'] ]#这里去把dataframe变成了列表 x4_minp = x4_minp[0]#这里从列表中具体进行取值 #第四条K线 #实体相似度 data['juli_oc4']= (pow(data['_r_Fop_4']-x4_op,t)+pow(data['_r_Fcp_4']-x4_cp,t)) data['juli_oc4']=data.apply(lambda row:calc_sqrt(row['juli_oc4']),axis=r) #上影线相似度 data['juli_hmax4']= (pow(data['_r_Fhp_4']-x4_hp,t)+pow(data['_r_maxco_4']-x4_maxp,t)) data['juli_hmax4']=data.apply(lambda row:calc_sqrt(row['juli_hmax4']),axis=r) #下影线相似度 data['juli_lmin4']= (pow(data['_r_Flp_4']-x4_lp,t)+pow(data['_r_minco_4']-x4_minp,t)) data['juli_lmin4']=data.apply(lambda row:calc_sqrt(row['juli_lmin4']),axis=r) #整体相似度距离 data['juli_4']=data['juli_oc4']*w+data['juli_hmax4']*ww+data['juli_lmin4']*www #提取每天的值 x5_op = [float(i) for i in x['_r_Fop_5'] ]#这里去把dataframe变成了列表 x5_op = x5_op[0]#这里从列表中具体进行取值 x5_cp = [float(i) for i in x['_r_Fcp_5'] ]#这里去把dataframe变成了列表 x5_cp = x5_cp[0]#这里从列表中具体进行取值 x5_hp = [float(i) for i in x['_r_Fhp_5'] ]#这里去把dataframe变成了列表 x5_hp = x5_hp[0]#这里从列表中具体进行取值 x5_maxp = [float(i) for i in x['_r_maxco_5'] ]#这里去把dataframe变成了列表 x5_maxp = x5_maxp[0]#这里从列表中具体进行取值 x5_lp = [float(i) for i in x['_r_Flp_5'] ]#这里去把dataframe变成了列表 x5_lp = x5_lp[0]#这里从列表中具体进行取值 x5_minp = [float(i) for i in x['_r_minco_5'] ]#这里去把dataframe变成了列表 x5_minp = x5_minp[0]#这里从列表中具体进行取值 #第五条K线 #实体相似度 data['juli_oc5']= (pow(data['_r_Fop_5']-x5_op,t)+pow(data['_r_Fcp_5']-x5_cp,t)) data['juli_oc5']=data.apply(lambda row:calc_sqrt(row['juli_oc5']),axis=r) #上影线相似度 data['juli_hmax5']= (pow(data['_r_Fhp_5']-x5_hp,t)+pow(data['_r_maxco_5']-x5_maxp,t)) data['juli_hmax5']=data.apply(lambda row:calc_sqrt(row['juli_hmax5']),axis=r) #下影线相似度 data['juli_lmin5']= (pow(data['_r_Flp_5']-x5_lp,t)+pow(data['_r_minco_5']-x5_minp,t)) data['juli_lmin5']=data.apply(lambda row:calc_sqrt(row['juli_lmin5']),axis=r) #整体相似度距离 data['juli_5']=data['juli_oc5']*w+data['juli_hmax5']*ww+data['juli_lmin5']*www #提取每天的值 x6_op = [float(i) for i in x['_r_Fop_6'] ]#这里去把dataframe变成了列表 x6_op = x6_op[0]#这里从列表中具体进行取值 x6_cp = [float(i) for i in x['_r_Fcp_6'] ]#这里去把dataframe变成了列表 x6_cp = x6_cp[0]#这里从列表中具体进行取值 x6_hp = [float(i) for i in x['_r_Fhp_6'] ]#这里去把dataframe变成了列表 x6_hp = x6_hp[0]#这里从列表中具体进行取值 x6_maxp = [float(i) for i in x['_r_maxco_6'] ]#这里去把dataframe变成了列表 x6_maxp = x6_maxp[0]#这里从列表中具体进行取值 x6_lp = [float(i) for i in x['_r_Flp_6'] ]#这里去把dataframe变成了列表 x6_lp = x6_lp[0]#这里从列表中具体进行取值 x6_minp = [float(i) for i in x['_r_minco_6'] ]#这里去把dataframe变成了列表 x6_minp = x6_minp[0]#这里从列表中具体进行取值 #第六条K线 #实体相似度 data['juli_oc6']= (pow(data['_r_Fop_6']-x6_op,t)+pow(data['_r_Fcp_6']-x6_cp,t)) data['juli_oc6']=data.apply(lambda row:calc_sqrt(row['juli_oc6']),axis=r) #上影线相似度 data['juli_hmax6']= (pow(data['_r_Fhp_6']-x6_hp,t)+pow(data['_r_maxco_6']-x6_maxp,t)) data['juli_hmax6']=data.apply(lambda row:calc_sqrt(row['juli_hmax6']),axis=r) #下影线相似度 data['juli_lmin6']= (pow(data['_r_Flp_6']-x6_lp,t)+pow(data['_r_minco_6']-x6_minp,t)) data['juli_lmin6']=data.apply(lambda row:calc_sqrt(row['juli_lmin6']),axis=r) #整体相似度距离 data['juli_6']=data['juli_oc6']*w+data['juli_hmax6']*ww+data['juli_lmin6']*www #提取每天的值 x7_op = [float(i) for i in x['_r_Fop_7'] ]#这里去把dataframe变成了列表 x7_op = x7_op[0]#这里从列表中具体进行取值 x7_cp = [float(i) for i in x['_r_Fcp_7'] ]#这里去把dataframe变成了列表 x7_cp = x7_cp[0]#这里从列表中具体进行取值 x7_hp = [float(i) for i in x['_r_Fhp_7'] ]#这里去把dataframe变成了列表 x7_hp = x7_hp[0]#这里从列表中具体进行取值 x7_maxp = [float(i) for i in x['_r_maxco_7'] ]#这里去把dataframe变成了列表 x7_maxp = x7_maxp[0]#这里从列表中具体进行取值 x7_lp = [float(i) for i in x['_r_Flp_7'] ]#这里去把dataframe变成了列表 x7_lp = x7_lp[0]#这里从列表中具体进行取值 x7_minp = [float(i) for i in x['_r_minco_7'] ]#这里去把dataframe变成了列表 x7_minp = x7_minp[0]#这里从列表中具体进行取值 #第七条K线 #实体相似度 data['juli_oc7']= (pow(data['_r_Fop_7']-x7_op,t)+pow(data['_r_Fcp_7']-x7_cp,t)) data['juli_oc7']=data.apply(lambda row:calc_sqrt(row['juli_oc7']),axis=r) #上影线相似度 data['juli_hmax7']= (pow(data['_r_Fhp_7']-x7_hp,t)+pow(data['_r_maxco_7']-x7_maxp,t)) data['juli_hmax7']=data.apply(lambda row:calc_sqrt(row['juli_hmax7']),axis=r) #下影线相似度 data['juli_lmin7']= (pow(data['_r_Flp_7']-x7_lp,t)+pow(data['_r_minco_7']-x7_minp,t)) data['juli_lmin7']=data.apply(lambda row:calc_sqrt(row['juli_lmin7']),axis=r) #整体相似度距离 data['juli_7']=data['juli_oc7']*w+data['juli_hmax7']*ww+data['juli_lmin7']*www #总体K线距离相似度 data['simkcoef'] = (data['juli_7']*27 + data['juli_6']*26+ data['juli_5']*25+ data['juli_4']*24+ data['juli_3']*23+ data['juli_2']*22+ data['juli_1']*21)/168 similar = data.ix[:,[u'mkdate', u'secode',u'simkcoef']] #保存供查询排序分析 pd.io.sql.to_sql(similar,"similar",con=conn,if_exists='append')
完整的相似度代码
猜你喜欢
转载自blog.csdn.net/sphinxrascal168/article/details/80006697
今日推荐
周排行