大宗交易数据挖掘(三)

计算维度

数据集字段如下:
1. [CurClose],当前收盘价;
2. [TradePrice],大宗交易成交价;
3. [FluctRange3],近3日波动幅度(中途发现波动幅度太大,止损点不好定,所以把最近3天波动幅度超过15%的都去掉了);
4. [Volume],大宗交易成交量
5. [Amount],大宗交易金额
6. [Discount],折扣率 = [TradePrice]/[CurClose] - 1
7. [BuyerCount],买家席位数;
8. [SellerCount],卖家席位数;
9. [TradeToSelf],是否存在同席位交易;
10. [Capital],流通股本;
11. [VolumeToCapital],交易量占流通股本比值 = [Volume]/[VolumeToCapital]
12. [LastDZJYDays],上一次大宗交易距今有多少个交易日,停盘时间不计;
13. [ClosedDays],累计停盘时长(交易日);
14. [ZSCloseToMA5],上证指数(大盘)当前收盘价相对于5日线的的涨幅 = 当前收盘价/MA5 - 1;
15. [ZSCloseToMA20],上证指数(大盘)当前收盘价相对于20日线的的涨幅 = 当前收盘价/MA20 - 1;
16. [CloseToMA5],股票当前收盘价相对于其5日线的的涨幅 = 当前收盘价/MA5 - 1;
17. [CloseToMA20],股票当前收盘价相对于其20日线的的涨幅 = 当前收盘价/MA20 - 1;
18. [BuyRK],股价低于大宗交易价时,所处的天数。例如大宗交易发生在2018-1-1,当前是2018-1-3,则该字段为2,表示大宗交易这个事件发生后的第2天,这种情况视为有买入的机会;
19. [SellRK],如果买入后,股价相对于买入价(或大宗交易价)上涨了3%,则记录当天所处的天数为SellRK。

主要代码如下:

if exists(select * from sysobjects where id=object_id('DZJY_TradeByDay') and OBJECTPROPERTY(id, 'IsTable')=1) 
drop table DZJY_TradeByDay
go
if object_id('tempdb..#BaseTradeInfo') is not null drop table tempdb..#BaseTradeInfo
go
if object_id('tempdb..#bsrk') is not null drop table tempdb..#bsrk 
go
if exists(select * from sysobjects where id=object_id('MultiDimDataSet') and OBJECTPROPERTY(id, 'IsTable')=1) 
drop table MultiDimDataSet
go

--基础交易信息,每天只有一条大宗交易数据
select distinct TradeDate, StockCode into DZJY_TradeByDay from DZJY

--TradeToSelf用来记录是否存在同席位交易,有则为1,无则为0
select (StockCode+convert(varchar(10),TradeDate, 112))as TradeKey,d.TradeDate, d.StockCode, 
        (select PriceClose from TradeByDay where TradeDate=d.TradeDate and StockCode = d.StockCode) as CurClose,
        round(sum(d.TradePrice * d.Volume)/sum(d.Volume),2) as TradePrice, 
        sum(d.Volume) as Volume, 
        round(sum(d.TradePrice * d.Volume),2) as Amount,
        count(distinct d.Buyer) as BuyerCount,
        count(distinct d.Seller) as SellerCount,
        max(TradeToSelf) as TradeToSelf
into #BaseTradeInfo
from (
    select StockCode, TradeDate, TradePrice, Volume, Buyer, Seller,
        TradeToSelf = case when Buyer = Seller then 1 else 0 end
    from DZJY

)d
group by d.TradeDate, d.StockCode

--买卖天数
select b1.TradeKey, b1.StockCode, b1.TradePrice, b1.CurClose, b1.FluctRange3, b1.RK, b1.BuyRK,
        --未来5天内最高价大于交易价的1.03倍,确定卖出的天数
        SellRK = case when b1.BuyRK is NULL then NULL
                      else (select TOP 1 RK from StockMA 
                            where StockCode = b1.StockCode and 
                                    RK between b1.BuyRK+1 and b1.BuyRK+5 and 
                                    PriceMax>(1.03*b1.TradePrice) order by TradeKey)
                      end,
        round(b1.CurClose/s1.MA20-1,4) as CloseToMA20,
        round(b1.CurClose/s1.MA5-1,4) as CloseToMA5
into #bsrk
from (
    select b.TradeKey, b.StockCode, b.TradePrice, b.CurClose, s.FluctRange3, s.RK,
            --未来5天内最低价小于大宗交易价,确定买入的天数
            (select TOP 1 RK from StockMA 
             where StockCode = b.StockCode and 
                    RK between s.RK+1 and s.RK+5 and 
                    PriceMin<b.TradePrice order by TradeKey) as BuyRK
    from #BaseTradeInfo b left outer join StockMA s on b.TradeKey = s.TradeKey
)b1 left outer join StockMA s1 on b1.TradeKey = s1.TradeKey

--抽取维度
select t.TradeKey, t.TradeDate, t.StockCode, t.CurClose, t.TradePrice, rk.FluctRange3, t.Volume, t.Amount, 
        round(t.TradePrice/t.CurClose-1,3) as Discount, t.BuyerCount, t.SellerCount,t.TradeToSelf,
        s.Capital, round(t.Volume/s.Capital,4) as VolumeToCapital,
        df.LastDZJYDays, (df.LastTradeDays-df.LastDZJYDays) as ClosedDays,
        round(zs.CurSHClose/zs.SHMA20-1,4) as ZSCloseToMA20, 
        round(zs.CurSHClose/zs.SHMA5-1,4) as ZSCloseToMA5,
        rk.RK, isnull(rk.BuyRK,0) as BuyRK, isnull(rk.SellRK,0) as SellRK, rk.CloseToMA20, rk.CloseToMA5
into MultiDimDataSet
from #BaseTradeInfo as t left outer join Stockinfo s on t.StockCode = s.StockCode
    left outer join DZJY_TradeFreq df on t.TradeKey = df.TradeKey
    left outer join SHZSMA zs on t.TradeDate = zs.TradeDate
    left outer join #bsrk rk on t.TradeKey = rk.TradeKey

查询的优化

计算买卖天数时,对于上百万行的数据,检索速度实在比较拙计。因此通过了一些手段来提高查询速度,譬如索引。这里展示部分表在计算时加的索引:

IF EXISTS(SELECT * FROM sysobjects WHERE id = object_id('gupiao.dbo.StockRanK') and OBJECTPROPERTY(id, 'IsTable')=1)
DROP TABLE gupiao.dbo.StockRanK
GO
IF EXISTS(SELECT * FROM sysobjects WHERE id = object_id('gupiao.dbo.StockMA') and OBJECTPROPERTY(id, 'IsTable')=1)
DROP TABLE gupiao.dbo.StockMA
GO

SELECT (StockCode+CONVERT(VARCHAR(8),TradeDate,112)) AS TradeKey, 
        TradeDate, StockCode, PriceOpen, PriceClose, PriceMin, PriceMax, Volume, Amount,
        RANK() OVER(PARTITION BY StockCode ORDER BY TradeDate) AS RK
INTO gupiao.dbo.StockRanK
FROM TradeByDay
GO


--增加索引
CREATE CLUSTERED INDEX [ClusteredIndex-1] ON gupiao.dbo.StockRanK
(
    [StockCode] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [NonClusteredIndex-1] ON gupiao.dbo.StockRanK
(
    [TradeDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO

--FluctRange3最近3个交易日的波动幅度
SELECT s.TradeKey, s.StockCode, s.TradeDate,PriceOpen, PriceClose, PriceMin, PriceMax, RK,
    MA5 = ROUND((SELECT CASE WHEN SUM(Volume)=0 THEN NULL ELSE SUM(Amount)/SUM(Volume) END
                    FROM StockRanK WHERE StockCode = s.StockCode AND (RK BETWEEN (s.RK-4) AND s.RK)),3),
    MA20 = ROUND((SELECT CASE WHEN SUM(Volume)=0 THEN NULL ELSE SUM(Amount)/SUM(Volume) END
                    FROM StockRanK WHERE StockCode = s.StockCode AND (RK BETWEEN (s.RK-19) AND s.RK)),3),
    FluctRange3 = ROUND((SELECT CASE WHEN Min(PriceMin)=0 THEN NULL ELSE Max(PriceMax)/Min(PriceMin)-1 END
                    FROM StockRanK WHERE StockCode = s.StockCode AND (RK BETWEEN (s.RK-2) AND s.RK)),4)
INTO StockMA
FROM StockRanK AS s

CREATE CLUSTERED INDEX [ClusteredIndex-1] ON gupiao.dbo.StockMA
(
    [StockCode] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO

CREATE NONCLUSTERED INDEX [NonClusteredIndex-1] ON gupiao.dbo.StockMA
(
    [TradeDate] ASC
)WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, ONLINE = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON) ON [PRIMARY]
GO

猜你喜欢

转载自blog.csdn.net/godzla/article/details/79431834