1)导入三年A股EOD数据–>
2)对每只股票检验其稳定性(adfuller方法)–>
3)若不稳定,则检验其一阶差分的稳定性(略)–>
4)对股票进行暴力配对,计算每队股票组合的协整关系
1)导入三年A股EOD数据:由于事先已经把数据存为CSV文件,直接按年导入并append.
import pandas as pd
import numpy as np
data=pd.read_csv(r'C:\Users\yi\Desktop\Study\量化交易\stock2018.csv',sep=',',engine ='python')
data2=pd.read_csv(r'C:\Users\yi\Desktop\Study\量化交易\stock2017.csv',sep=',',engine ='python')
data3=pd.read_csv(r'C:\Users\yi\Desktop\Study\量化交易\stock2016.csv',sep=',',engine ='python')
data_test0=data.append(data2)
data_test1=data_test0.append(data3)
#将交易日期作为index,只保留ts_code和close,按日期升序排列
EOD=data_test1.set_index(['trade_date'],drop=True).drop(['pre_close','change','pct_change','trade_date.1','open','high','low','vol','amount'],axis=1).sort_index()
p_close=EOD['close']
#EOD.pivot_table(index='ts_code',values=['open','high','low','close'],aggfunc=(np.log(p_close/p_close.shift(1)).mean))
EODG=EOD.groupby(['ts_code'])
stock_close_list=pd.DataFrame()
#将股票名作为column,trade_date 作为索引,close作为value:
for i in EODG.groups.keys():
stock_close_list[i]=EODG.get_group(i)['close']
stock_close_list.head()
#del cols contains NaN
stock_close_list=stock_close_list.dropna(axis=1)
观察数据EOD:
ts_code close
trade_date
20160104 603999.SH 52.44
20160104 600775.SH 17.17
20160104 002067.SZ 8.04
20160104 600776.SH 10.22
20160104 002066.SZ 18.30
p_close:
trade_date
20160104 52.44
20160104 17.17
20160104 8.04
20160104 10.22
20160104 18.30
Name: close, dtype: float64
EODG:
ts_code close
trade_date
20160104 603999.SH 52.44
20160104 600775.SH 17.17
20160104 002067.SZ 8.04
20160104 600776.SH 10.22
20160104 002066.SZ 18.30
20160104 600777.SH 4.07
20160104 002064.SZ 5.82
20160104 600778.SH 11.12
stock_close_list:
000001.SZ 000005.SZ 000009.SZ 000010.SZ 000011.SZ 000012.SZ 000014.SZ 000026.SZ 000027.SZ 000036.SZ ... 603698.SH 603806.SH 603866.SH 603885.SH 603899.SH 603968.SH 603969.SH 603979.SH 603989.SH 603996.SH
trade_date
20160104 9.30 9.04 11.92 9.10 12.98 11.68 20.48 14.61 8.57 8.12 ... 31.77 44.20 41.88 30.45 18.87 26.54 17.73 22.84 31.39 21.42
20160105 9.36 8.90 11.36 8.59 12.44 11.35 19.26 13.99 8.55 8.34 ... 30.77 42.74 46.07 28.58 20.74 25.99 16.65 22.32 30.01 23.56
20160106 9.46 9.18 11.77 8.94 12.72 11.70 19.66 14.24 8.76 8.58 ... 31.41 43.71 50.68 29.56 21.32 26.54 17.04 23.16 30.99 25.91
20160107 8.98 8.27 10.59 8.05 11.44 10.54 17.69 12.81 7.93 7.83 ... 28.27 39.37 55.46 26.62 19.18 23.90 15.34 20.85 27.89 28.50
20160108 9.13 8.30 10.69 8.06 11.52 10.64 17.96 12.78 8.08 8.38 ... 28.54 39.19 61.01 27.62 20.72 24.10 15.52 21.21 28.56 31.35
20160111 8.83 7.47 9.62 7.25 10.37 9.58 16.16 11.99 7.40 8.11 ... 25.68 38.14 54.90 27.16 19.71 23.65 13.97 19.45 25.73 34.49
20160112 8.87 7.50 9.62 7.29 10.29 9.90 16.26 12.09 7.46 8.23 ... 25.91 38.92 49.41 28.17 17.76 23.97 13.94 19.87 26.06 31.04
20160113 8.79 7.11 9.33 6.92 9.69 9.33 15.22 11.95 7.24 7.42 ... 24.69 37.95 44.47 26.96 16.36 22.74 13.17 19.50 24.94 27.94
20160114 8.84 7.40 9.73 7.12 10.00 9.78 15.73 12.34 7.45 7.47 ... 25.76 38.92 43.44 27.54 16.66 23.60 13.82 20.26 25.93 26.12
20160115 8.59 7.14 9.05 7.10 9.77 8.95 14.60 11.78 7.25 7.07 ... 25.54 36.61 39.10 25.93 15.67 22.35 13.45 19.39 24.66 23.51
10 rows × 809 columns
2)对每只股票检验其稳定性(adfuller方法)
adf_test=pd.DataFrame()
#定义平稳序列检测函数
from statsmodels.tsa.stattools import adfuller
def testStationarity(data):
adftest = adfuller(data)
result = pd.Series(adftest[0:4], index=['Test Statistic','p-value','Lags Used','Number of Observations Used'])
for key,value in adftest[4].items():
result['Critical Value (%s)'%key] = value
return result
a=1
b=0
#对所有股票作平稳序列检测,将结果放入adf_test.T
for code in stock_close_list.columns:
ts=testStationarity(stock_close_list[code])
if b==0:
adf_test=pd.DataFrame({code:ts})
elif b>0:
transit_adf=pd.DataFrame({code:ts})
#print(transit_adf)
print(adf_test)
adf_test=pd.concat( [adf_test,transit_adf],axis=1)
a+=1
b=1
#if a>10:
# break
adf_test.T.sort_values(by=['p-value'],ascending=True).head(10)
Test Statistic p-value Lags Used Number of Observations Used Critical Value (1%) Critical Value (5%) Critical Value (10%)
002690.SZ -5.779028 5.177776e-07 1.0 700.0 -3.439726 -2.865678 -2.568973
002215.SZ -4.235062 5.744170e-04 0.0 701.0 -3.439713 -2.865672 -2.568970
000767.SZ -4.204164 6.482872e-04 1.0 700.0 -3.439726 -2.865678 -2.568973
603568.SH -4.170916 7.377087e-04 1.0 700.0 -3.439726 -2.865678 -2.568973
601566.SH -3.904124 2.004703e-03 0.0 701.0 -3.439713 -2.865672 -2.568970
002644.SZ -3.571075 6.338700e-03 1.0 700.0 -3.439726 -2.865678 -2.568973
300357.SZ -3.551685 6.754800e-03 0.0 701.0 -3.439713 -2.865672 -2.568970
002401.SZ -3.511313 7.701260e-03 0.0 701.0 -3.439713 -2.865672 -2.568970
002268.SZ -3.415361 1.044624e-02 0.0 701.0 -3.439713 -2.865672 -2.568970
600754.SH -3.398580 1.100738e-02 1.0 700.0 -3.439726 -2.865678 -2.568973
从p-value的值观察,三千多只股票中,只有4只的EOD时间序列是平稳序列. 以最极端的002690.SZ为例
#根据结果可得,股票价格基本都是随机游走,只有极个别除外,比如002690.SZ;
#stock_close_list['002690.SZ'].plot(figsize=(14,7))
#stock_close_list['002690.SZ'].values
import matplotlib.pyplot as plt
fig=plt.figure(dpi=128,figsize=(5,3))
plt.plot(stock_close_list.index,stock_close_list['002690.SZ'], linewidth=1)
plt.xlabel('date',fontsize=12)
plt.ylabel('EOD price',fontsize=12)
plt.title('002690.SZ EOD')
plt.tick_params(axis='both',which='major',labelsize=12)
plt.legend('002690.SZ')
3)若不稳定,则检验其一阶差分的稳定性(略)
个人认为,由于A股有涨跌停,所以长期来看,股票走势的一阶差分基本都在涨跌停的箱体之内波动,所以A股股票的一阶差分基本都是平稳序列,本步骤忽略.
4)对股票进行暴力配对,计算每队股票组合的协整关系
#下面暴力寻找成对股票的协整关系
from statsmodels.tsa.stattools import coint
coint_list=pd.Series()
for stock1 in stock_close_list.columns:
for stock2 in stock_close_list.columns:
if stock1!=stock2:
coint_value=coint(stock_close_list[stock1],stock_close_list[stock2])[1]
index0=str(stock1)+'/'+str(stock2)
coint_list0=pd.Series(coint_value,index=[index0])
coint_list=coint_list.append(coint_list0)
经过5,6个小时的计算,得到coint_list结果,将结果写入CSV文件备用.
扫描二维码关注公众号,回复:
4753166 查看本文章
coint_list.to_csv(r'C:\Users\yi\Desktop\Study\量化交易\coint_list.csv')
下一章将对强协整关系的股票组合进行深入分析