stats.py里面封装了大量的计算绩效和风险的指标,但是估计这个写的时间比较早,用了大量的numpy,而对pandas使用较少,如果使用pandas的高级功能来封装这些函数的话,总体的代码量可能大大较少
##########################################################################
#从future模块引入division,在py2中进行真除法
from __future__
import division
#pandas模块
import pandas
as pd
#numpy 模块
import numpy
as np
#scipy 模块
from scipy
import stats
#six模块,py2\py3兼容包
from six
import iteritems
#从utils引入平均值、标准差、最小、等函数
from .utils
import nanmean, nanstd, nanmin, up, down, roll, rolling_window
#从周期中引入年化因子、每年近似日期数目
from .periods
import ANNUALIZATION_FACTORS, APPROX_BDAYS_PER_YEAR
#从周期中引入日、周、约、年变量
from .periods
import DAILY, WEEKLY, MONTHLY, YEARLY
def
_create_unary_vectorized_roll_function(
function):
def
unary_vectorized_roll(
arr,
window,
out=
None, **
kwargs):
"""
Computes the {human_readable} measure over a rolling window.
Parameters
----------
arr : array-like
The array to compute the rolling {human_readable} over.
window : int
Size of the rolling window in terms of the periodicity of the data.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
**kwargs
Forwarded to :func:`~empyrical.{name}`.
Returns
-------
rolling_{name} : array-like
The rolling {human_readable}.
"""
allocated_output = out
is
None
if
len(arr):
out = function(
rolling_window(arr,
min(
len(arr), window)).T,
out=out,
**kwargs
)
else:
out = np.empty(
0,
dtype=
'float64')
if allocated_output
and
isinstance(arr, pd.Series):
out = pd.Series(out,
index=arr.index[-
len(out):])
return out
unary_vectorized_roll.
__doc__ = unary_vectorized_roll.
__doc__.format(
name=function.
__name__,
human_readable=function.
__name__.replace(
'_',
' '),
)
return unary_vectorized_roll
def
_create_binary_vectorized_roll_function(
function):
def
binary_vectorized_roll(
lhs,
rhs,
window,
out=
None, **
kwargs):
"""
Computes the {human_readable} measure over a rolling window.
Parameters
----------
lhs : array-like
The first array to pass to the rolling {human_readable}.
rhs : array-like
The second array to pass to the rolling {human_readable}.
window : int
Size of the rolling window in terms of the periodicity of the data.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
**kwargs
Forwarded to :func:`~empyrical.{name}`.
Returns
-------
rolling_{name} : array-like
The rolling {human_readable}.
"""
allocated_output = out
is
None
if window >=
1
and
len(lhs)
and
len(rhs):
out = function(
rolling_window(lhs,
min(
len(lhs), window)).T,
rolling_window(rhs,
min(
len(rhs), window)).T,
out=out,
**kwargs
)
elif allocated_output:
out = np.empty(
0,
dtype=
'float64')
else:
out[()] = np.nan
if allocated_output:
if out.ndim ==
1
and
isinstance(lhs, pd.Series):
out = pd.Series(out,
index=lhs.index[-
len(out):])
elif out.ndim ==
2
and
isinstance(lhs, pd.Series):
out = pd.DataFrame(out,
index=lhs.index[-
len(out):])
return out
binary_vectorized_roll.
__doc__ = binary_vectorized_roll.
__doc__.format(
name=function.
__name__,
human_readable=function.
__name__.replace(
'_',
' '),
)
return binary_vectorized_roll
def
_adjust_returns(
returns,
adjustment_factor):
"""
Returns the returns series adjusted by adjustment_factor. Optimizes for the
case of adjustment_factor being 0 by returning returns itself, not a copy!
Parameters
----------
returns : pd.Series or np.ndarray
adjustment_factor : pd.Series or np.ndarray or float or int
Returns
-------
adjusted_returns : array-like
"""
#判断这个是不是多余的,直接返回returns - adjustment_factor不就可以了吗?
#是为了效率?
if
isinstance(adjustment_factor, (
float,
int))
and adjustment_factor ==
0:
return returns
return returns - adjustment_factor
def
annualization_factor(
period,
annualization):
#可以自定义annualization,如果没有自定义,就从period. py里面选
"""
Return annualization factor from period entered or if a custom
value is passed in.
Parameters
----------
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
Returns
-------
annualization_factor : float
"""
if annualization
is
None:
try:
factor = ANNUALIZATION_FACTORS[period]
except
KeyError:
raise
ValueError(
"Period cannot be '{}'. "
"Can be '{}'.".format(
period,
"', '".join(ANNUALIZATION_FACTORS.keys())
)
)
else:
factor = annualization
return factor
def
cum_returns(
returns,
starting_value=
0,
out=
None):
"""
Compute cumulative returns from simple returns.
Parameters
----------
returns : pd.Series, np.ndarray, or pd.DataFrame
Returns of the strategy as a percentage, noncumulative.
- Time series with decimal returns.
- Example::
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902
- Also accepts two dimensional data. In this case, each column is
cumulated.
starting_value : float, optional
The starting returns.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
cumulative_returns : array-like
Series of cumulative returns.
"""
if
len(returns) <
1:
return returns.copy()
nanmask = np.isnan(returns)
if np.any(nanmask):
returns = returns.copy()
returns[nanmask] =
0
allocated_output = out
is
None
if allocated_output:
out = np.empty_like(returns)
np.add(returns,
1,
out=out)
out.cumprod(
axis=
0,
out=out)
if starting_value ==
0:
np.subtract(out,
1,
out=out)
else:
np.multiply(out, starting_value,
out=out)
if allocated_output:
if returns.ndim ==
1
and
isinstance(returns, pd.Series):
out = pd.Series(out,
index=returns.index)
elif
isinstance(returns, pd.DataFrame):
out = pd.DataFrame(
out,
index=returns.index,
columns=returns.columns,
)
return out
def
cum_returns_final(
returns,
starting_value=
0):
"""
Compute total returns from simple returns.
Parameters
----------
returns : pd.Series or np.ndarray
Returns of the strategy as a percentage, noncumulative.
- Time series with decimal returns.
- Example:
2015-07-16 -0.012143
2015-07-17 0.045350
2015-07-20 0.030957
2015-07-21 0.004902.
starting_value : float, optional
The starting returns.
Returns
-------
total_returns : float
"""
if
len(returns) ==
0:
return np.nan
returns = np.nanprod(returns +
1)
if starting_value ==
0:
returns -=
1
else:
returns *= starting_value
return returns
def
aggregate_returns(
returns,
convert_to):
#把收益转换成周、月、年
"""
Aggregates returns by week, month, or year.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
convert_to : str
Can be 'weekly', 'monthly', or 'yearly'.
Returns
-------
aggregated_returns : pd.Series
"""
def
cumulate_returns(
x):
return cum_returns(x).iloc[-
1]
if convert_to == WEEKLY:
grouping = [
lambda
x: x.year,
lambda
x: x.isocalendar()[
1]]
elif convert_to == MONTHLY:
grouping = [
lambda
x: x.year,
lambda
x: x.month]
elif convert_to == YEARLY:
grouping = [
lambda
x: x.year]
else:
raise
ValueError(
'convert_to must be {}, {} or {}'.format(WEEKLY, MONTHLY, YEARLY)
)
return returns.groupby(grouping).apply(cumulate_returns)
def
max_drawdown(
returns,
out=
None):
#计算最大回撤
"""
Determines the maximum drawdown of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
max_drawdown : float
Note
-----
See https://en.wikipedia.org/wiki/Drawdown_(economics) for more details.
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:])
returns_1d = returns.ndim ==
1
if
len(returns) <
1:
out[()] = np.nan
if returns_1d:
out = out.item()
return out
returns_array = np.asanyarray(returns)
cumulative = np.empty(
(returns.shape[
0] +
1,) + returns.shape[
1:],
dtype=
'float64',
)
cumulative[
0] = start =
100
cum_returns(returns_array,
starting_value=start,
out=cumulative[
1:])
max_return = np.fmax.accumulate(cumulative,
axis=
0)
nanmin((cumulative - max_return) / max_return,
axis=
0,
out=out)
if returns_1d:
out = out.item()
elif allocated_output
and
isinstance(returns, pd.DataFrame):
out = pd.Series(out)
return out
roll_max_drawdown = _create_unary_vectorized_roll_function(max_drawdown)
def
annual_return(
returns,
period=DAILY,
annualization=
None):
#计算复合年化收益
"""
Determines the mean annual growth rate of returns. This is equivilent
to the compound annual growth rate.
Parameters
----------
returns : pd.Series or np.ndarray
Periodic returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
Returns
-------
annual_return : float
Annual Return as CAGR (Compounded Annual Growth Rate).
"""
if
len(returns) <
1:
return np.nan
ann_factor = annualization_factor(period, annualization)
num_years =
len(returns) / ann_factor
# Pass array to ensure index -1 looks up successfully.
ending_value = cum_returns_final(returns,
starting_value=
1)
return ending_value ** (
1 / num_years) -
1
def
cagr(
returns,
period=DAILY,
annualization=
None):
"""
Compute compound annual growth rate. Alias function for
:func:`~empyrical.stats.annual_return`
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
- See full explanation in :func:`~empyrical.stats.annual_return`.
Returns
-------
cagr : float
The CAGR value.
"""
return annual_return(returns, period, annualization)
roll_cagr = _create_unary_vectorized_roll_function(cagr)
def
annual_volatility(
returns,
period=DAILY,
alpha=
2.0,
annualization=
None,
out=
None):
#计算收益率的标准差
"""
Determines the annual volatility of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray
Periodic returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
alpha : float, optional
Scaling relation (Levy stability exponent).
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
annual_volatility : float
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:])
returns_1d = returns.ndim ==
1
if
len(returns) <
2:
out[()] = np.nan
if returns_1d:
out = out.item()
return out
ann_factor = annualization_factor(period, annualization)
nanstd(returns,
ddof=
1,
axis=
0,
out=out)
out = np.multiply(out, ann_factor ** (
1.0 / alpha),
out=out)
if returns_1d:
out = out.item()
return out
roll_annual_volatility = _create_unary_vectorized_roll_function(
annual_volatility,
)
def
calmar_ratio(
returns,
period=DAILY,
annualization=
None):
#计算年化收益除以最大回撤的比率,mar
"""
Determines the Calmar ratio, or drawdown ratio, of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
Returns
-------
calmar_ratio : float
Calmar ratio (drawdown ratio) as float. Returns np.nan if there is no
calmar ratio.
Note
-----
See https://en.wikipedia.org/wiki/Calmar_ratio for more details.
"""
max_dd = max_drawdown(
returns=returns)
if max_dd <
0:
temp = annual_return(
returns=returns,
period=period,
annualization=annualization
) /
abs(max_dd)
else:
return np.nan
if np.isinf(temp):
return np.nan
return temp
def
omega_ratio(
returns,
risk_free=
0.0,
required_return=
0.0,
annualization=APPROX_BDAYS_PER_YEAR):
#计算omega_ratio,目标达成收益率的和除以收益率目标未达成的和
"""Determines the Omega ratio of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
risk_free : int, float
Constant risk-free return throughout the period
required_return : float, optional
Minimum acceptance return of the investor. Threshold over which to
consider positive vs negative returns. It will be converted to a
value appropriate for the period of the returns. E.g. An annual minimum
acceptable return of 100 will translate to a minimum acceptable
return of 0.018.
annualization : int, optional
Factor used to convert the required_return into a daily
value. Enter 1 if no time period conversion is necessary.
Returns
-------
omega_ratio : float
Note
-----
See https://en.wikipedia.org/wiki/Omega_ratio for more details.
"""
if
len(returns) <
2:
return np.nan
if annualization ==
1:
return_threshold = required_return
elif required_return <= -
1:
return np.nan
else:
return_threshold = (
1 + required_return) ** \
(
1. / annualization) -
1
returns_less_thresh = returns - risk_free - return_threshold
numer =
sum(returns_less_thresh[returns_less_thresh >
0.0])
denom = -
1.0 *
sum(returns_less_thresh[returns_less_thresh <
0.0])
if denom >
0.0:
return numer / denom
else:
return np.nan
def
sharpe_ratio(
returns,
risk_free=
0,
period=DAILY,
annualization=
None,
out=
None):
#计算夏普率
"""
Determines the Sharpe ratio of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
risk_free : int, float
Constant risk-free return throughout the period.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
sharpe_ratio : float
nan if insufficient length of returns or if if adjusted returns are 0.
Note
-----
See https://en.wikipedia.org/wiki/Sharpe_ratio for more details.
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:])
return_1d = returns.ndim ==
1
if
len(returns) <
2:
out[()] = np.nan
if return_1d:
out = out.item()
return out
returns_risk_adj = np.asanyarray(_adjust_returns(returns, risk_free))
ann_factor = annualization_factor(period, annualization)
np.multiply(
np.divide(
nanmean(returns_risk_adj,
axis=
0),
nanstd(returns_risk_adj,
ddof=
1,
axis=
0),
out=out,
),
np.sqrt(ann_factor),
out=out,
)
if return_1d:
out = out.item()
return out
roll_sharpe_ratio = _create_unary_vectorized_roll_function(sharpe_ratio)
def
sortino_ratio(
returns,
required_return=
0,
period=DAILY,
annualization=
None,
out=
None,
_downside_risk=
None):
#计算索提诺比率
"""
Determines the Sortino ratio of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray or pd.DataFrame
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
required_return: float / series
minimum acceptable return
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
_downside_risk : float, optional
The downside risk of the given inputs, if known. Will be calculated if
not provided.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
sortino_ratio : float or pd.Series
depends on input type
series ==> float
DataFrame ==> pd.Series
Note
-----
See `<https://www.sunrisecapital.com/wp-content/uploads/2014/06/Futures_
Mag_Sortino_0213.pdf>`__ for more details.
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:])
return_1d = returns.ndim ==
1
if
len(returns) <
2:
out[()] = np.nan
if return_1d:
out = out.item()
return out
adj_returns = np.asanyarray(_adjust_returns(returns, required_return))
ann_factor = annualization_factor(period, annualization)
average_annual_return = nanmean(adj_returns,
axis=
0) * ann_factor
annualized_downside_risk = (
_downside_risk
if _downside_risk
is
not
None
else
downside_risk(returns, required_return, period, annualization)
)
np.divide(average_annual_return, annualized_downside_risk,
out=out)
if return_1d:
out = out.item()
elif
isinstance(returns, pd.DataFrame):
out = pd.Series(out)
return out
roll_sortino_ratio = _create_unary_vectorized_roll_function(sortino_ratio)
def
downside_risk(
returns,
required_return=
0,
period=DAILY,
annualization=
None,
out=
None):
#计算下行风险
"""
Determines the downside deviation below a threshold
Parameters
----------
returns : pd.Series or np.ndarray or pd.DataFrame
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
required_return: float / series
minimum acceptable return
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
downside_deviation : float or pd.Series
depends on input type
series ==> float
DataFrame ==> pd.Series
Note
-----
See `<https://www.sunrisecapital.com/wp-content/uploads/2014/06/Futures_
Mag_Sortino_0213.pdf>`__ for more details, specifically why using the
standard deviation of the negative returns is not correct.
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:])
returns_1d = returns.ndim ==
1
if
len(returns) <
1:
out[()] = np.nan
if returns_1d:
out = out.item()
return out
ann_factor = annualization_factor(period, annualization)
downside_diff = np.clip(
_adjust_returns(
np.asanyarray(returns),
np.asanyarray(required_return),
),
np.NINF,
0,
)
np.square(downside_diff,
out=downside_diff)
nanmean(downside_diff,
axis=
0,
out=out)
np.sqrt(out,
out=out)
np.multiply(out, np.sqrt(ann_factor),
out=out)
if returns_1d:
out = out.item()
elif
isinstance(returns, pd.DataFrame):
out = pd.Series(out,
index=returns.columns)
return out
roll_downsize_risk = _create_unary_vectorized_roll_function(downside_risk)
def
excess_sharpe(
returns,
factor_returns,
out=
None):
#计算风险调整后的超额收益率
"""
Determines the Excess Sharpe of a strategy.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns: float / series
Benchmark return to compare returns against.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
excess_sharpe : float
Note
-----
The excess Sharpe is a simplified Information Ratio that uses
tracking error rather than "active risk" as the denominator.
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:])
returns_1d = returns.ndim ==
1
if
len(returns) <
2:
out[()] = np.nan
if returns_1d:
out = out.item()
return out
active_return = _adjust_returns(returns, factor_returns)
tracking_error = np.nan_to_num(nanstd(active_return,
ddof=
1,
axis=
0))
out = np.divide(
nanmean(active_return,
axis=
0,
out=out),
tracking_error,
out=out,
)
if returns_1d:
out = out.item()
return out
roll_excess_sharpe = _create_binary_vectorized_roll_function(excess_sharpe)
def
_to_pandas(
ob):
#转换成seriesh或者dataframe
"""Convert an array-like to a pandas object.
Parameters
----------
ob : array-like
The object to convert.
Returns
-------
pandas_structure : pd.Series or pd.DataFrame
The correct structure based on the dimensionality of the data.
"""
if
isinstance(ob, (pd.Series, pd.DataFrame)):
return ob
if ob.ndim ==
1:
return pd.Series(ob)
elif ob.ndim ==
2:
return pd.DataFrame(ob)
else:
raise
ValueError(
'cannot convert array of dim > 2 to a pandas structure',
)
def
_aligned_series(*
many_series):
"""
Return a new list of series containing the data in the input series, but
with their indices aligned. NaNs will be filled in for missing values.
Parameters
----------
*many_series
The series to align.
Returns
-------
aligned_series : iterable[array-like]
A new list of series containing the data in the input series, but
with their indices aligned. NaNs will be filled in for missing values.
"""
head = many_series[
0]
tail = many_series[
1:]
n =
len(head)
if (
isinstance(head, np.ndarray)
and
all(
len(s) == n
and
isinstance(s, np.ndarray)
for s
in tail)):
# optimization: ndarrays of the same length are already aligned
return many_series
# dataframe has no ``itervalues``
return (
v
for _, v
in iteritems(pd.concat(
map(_to_pandas, many_series),
axis=
1))
)
def
alpha_beta(
returns,
factor_returns,
risk_free=
0.0,
period=DAILY,
annualization=
None,
out=
None):
"""Calculates annualized alpha and beta.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
risk_free : int, float, optional
Constant risk-free return throughout the period. For example, the
interest rate on a three month us treasury bill.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
alpha : float
beta : float
"""
returns, factor_returns = _aligned_series(returns, factor_returns)
return alpha_beta_aligned(
returns,
factor_returns,
risk_free=risk_free,
period=period,
annualization=annualization,
out=out,
)
def
roll_alpha_beta(
returns,
factor_returns,
window=
10, **
kwargs):
"""
Computes alpha and beta over a rolling window.
Parameters
----------
lhs : array-like
The first array to pass to the rolling alpha-beta.
rhs : array-like
The second array to pass to the rolling alpha-beta.
window : int
Size of the rolling window in terms of the periodicity of the data.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
**kwargs
Forwarded to :func:`~empyrical.alpha_beta`.
"""
returns, factor_returns = _aligned_series(returns, factor_returns)
return roll_alpha_beta_aligned(
returns,
factor_returns,
window=window,
**kwargs
)
def
alpha_beta_aligned(
returns,
factor_returns,
risk_free=
0.0,
period=DAILY,
annualization=
None,
out=
None):
"""Calculates annualized alpha and beta.
If they are pd.Series, expects returns and factor_returns have already
been aligned on their labels. If np.ndarray, these arguments should have
the same shape.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
risk_free : int, float, optional
Constant risk-free return throughout the period. For example, the
interest rate on a three month us treasury bill.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
alpha : float
beta : float
"""
if out
is
None:
out = np.empty(returns.shape[
1:] + (
2,),
dtype=
'float64')
b = beta_aligned(returns, factor_returns, risk_free,
out=out[...,
1])
alpha_aligned(
returns,
factor_returns,
risk_free,
period,
annualization,
out=out[...,
0],
_beta=b,
)
return out
roll_alpha_beta_aligned = _create_binary_vectorized_roll_function(
alpha_beta_aligned,
)
def
alpha(
returns,
factor_returns,
risk_free=
0.0,
period=DAILY,
annualization=
None,
out=
None,
_beta=
None):
"""Calculates annualized alpha.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
risk_free : int, float, optional
Constant risk-free return throughout the period. For example, the
interest rate on a three month us treasury bill.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
- See full explanation in :func:`~empyrical.stats.annual_return`.
_beta : float, optional
The beta for the given inputs, if already known. Will be calculated
internally if not provided.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
float
Alpha.
"""
if
not (
isinstance(returns, np.ndarray)
and
isinstance(factor_returns, np.ndarray)):
returns, factor_returns = _aligned_series(returns, factor_returns)
return alpha_aligned(
returns,
factor_returns,
risk_free=risk_free,
period=period,
annualization=annualization,
out=out,
_beta=_beta
)
roll_alpha = _create_binary_vectorized_roll_function(alpha)
def
alpha_aligned(
returns,
factor_returns,
risk_free=
0.0,
period=DAILY,
annualization=
None,
out=
None,
_beta=
None):
"""Calculates annualized alpha.
If they are pd.Series, expects returns and factor_returns have already
been aligned on their labels. If np.ndarray, these arguments should have
the same shape.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
risk_free : int, float, optional
Constant risk-free return throughout the period. For example, the
interest rate on a three month us treasury bill.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
annualization : int, optional
Used to suppress default values available in `period` to convert
returns into annual returns. Value should be the annual frequency of
`returns`.
- See full explanation in :func:`~empyrical.stats.annual_return`.
_beta : float, optional
The beta for the given inputs, if already known. Will be calculated
internally if not provided.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
alpha : float
"""
allocated_output = out
is
None
if allocated_output:
out = np.empty(returns.shape[
1:],
dtype=
'float64')
if
len(returns) <
2:
out[()] = np.nan
if returns.ndim ==
1:
out = out.item()
return out
ann_factor = annualization_factor(period, annualization)
if _beta
is
None:
_beta = beta_aligned(returns, factor_returns, risk_free)
adj_returns = _adjust_returns(returns, risk_free)
adj_factor_returns = _adjust_returns(factor_returns, risk_free)
alpha_series = adj_returns - (_beta * adj_factor_returns)
out = np.multiply(
nanmean(alpha_series,
axis=
0,
out=out),
ann_factor,
out=out,
)
if allocated_output
and
isinstance(returns, pd.DataFrame):
out = pd.Series(out)
if returns.ndim ==
1:
out = out.item()
return out
roll_alpha_aligned = _create_binary_vectorized_roll_function(alpha_aligned)
def
beta(
returns,
factor_returns,
risk_free=
0.0,
out=
None):
"""Calculates beta.
Parameters
----------
returns : pd.Series
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
risk_free : int, float, optional
Constant risk-free return throughout the period. For example, the
interest rate on a three month us treasury bill.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
beta : float
"""
if
not (
isinstance(returns, np.ndarray)
and
isinstance(factor_returns, np.ndarray)):
returns, factor_returns = _aligned_series(returns, factor_returns)
return beta_aligned(
returns,
factor_returns,
risk_free=risk_free,
out=out,
)
roll_beta = _create_binary_vectorized_roll_function(beta)
def
beta_aligned(
returns,
factor_returns,
risk_free=
0.0,
out=
None):
"""Calculates beta.
If they are pd.Series, expects returns and factor_returns have already
been aligned on their labels. If np.ndarray, these arguments should have
the same shape.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Daily noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
risk_free : int, float, optional
Constant risk-free return throughout the period. For example, the
interest rate on a three month us treasury bill.
out : array-like, optional
Array to use as output buffer.
If not passed, a new array will be created.
Returns
-------
beta : float
Beta.
"""
# Cache these as locals since we're going to call them multiple times.
nan = np.nan
isnan = np.isnan
returns_1d = returns.ndim ==
1
if returns_1d:
returns = returns[:, np.newaxis]
if factor_returns.ndim ==
1:
factor_returns = factor_returns[:, np.newaxis]
N, M = returns.shape
if out
is
None:
out = np.full(M, nan)
elif out.ndim ==
0:
out = out[np.newaxis]
if
len(returns) <
1
or
len(factor_returns) <
2:
out[()] = nan
if returns_1d:
out = out.item()
return out
# Copy N times as a column vector and fill with nans to have the same
# missing value pattern as the dependent variable.
#
# PERF_TODO: We could probably avoid the space blowup by doing this in
# Cython.
# shape: (N, M)
independent = np.where(
isnan(returns),
nan,
factor_returns,
)
# Calculate beta as Cov(X, Y) / Cov(X, X).
# https://en.wikipedia.org/wiki/Simple_linear_regression#Fitting_the_regression_line # noqa
#
# NOTE: The usual formula for covariance is::
#
# mean((X - mean(X)) * (Y - mean(Y)))
#
# However, we don't actually need to take the mean of both sides of the
# product, because of the folllowing equivalence::
#
# Let X_res = (X - mean(X)).
# We have:
#
# mean(X_res * (Y - mean(Y))) = mean(X_res * (Y - mean(Y)))
# (1) = mean((X_res * Y) - (X_res * mean(Y)))
# (2) = mean(X_res * Y) - mean(X_res * mean(Y))
# (3) = mean(X_res * Y) - mean(X_res) * mean(Y)
# (4) = mean(X_res * Y) - 0 * mean(Y)
# (5) = mean(X_res * Y)
#
#
# The tricky step in the above derivation is step (4). We know that
# mean(X_res) is zero because, for any X:
#
# mean(X - mean(X)) = mean(X) - mean(X) = 0.
#
# The upshot of this is that we only have to center one of `independent`
# and `dependent` when calculating covariances. Since we need the centered
# `independent` to calculate its variance in the next step, we choose to
# center `independent`.
ind_residual = independent - nanmean(independent,
axis=
0)
covariances = nanmean(ind_residual * returns,
axis=
0)
# We end up with different variances in each column here because each
# column may have a different subset of the data dropped due to missing
# data in the corresponding dependent column.
# shape: (M,)
np.square(ind_residual,
out=ind_residual)
independent_variances = nanmean(ind_residual,
axis=
0)
independent_variances[independent_variances <
1.0e-30] = np.nan
np.divide(covariances, independent_variances,
out=out)
if returns_1d:
out = out.item()
return out
roll_beta_aligned = _create_binary_vectorized_roll_function(beta_aligned)
def
stability_of_timeseries(
returns):
"""Determines R-squared of a linear fit to the cumulative
log returns. Computes an ordinary least squares linear fit,
and returns R-squared.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
Returns
-------
float
R-squared.
"""
if
len(returns) <
2:
return np.nan
returns = np.asanyarray(returns)
returns = returns[~np.isnan(returns)]
cum_log_returns = np.log1p(returns).cumsum()
rhat = stats.linregress(np.arange(
len(cum_log_returns)),
cum_log_returns)[
2]
return rhat **
2
def
tail_ratio(
returns):
"""Determines the ratio between the right (95%) and left tail (5%).
For example, a ratio of 0.25 means that losses are four times
as bad as profits.
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
Returns
-------
tail_ratio : float
"""
if
len(returns) <
1:
return np.nan
returns = np.asanyarray(returns)
# Be tolerant of nan's
returns = returns[~np.isnan(returns)]
if
len(returns) <
1:
return np.nan
return np.abs(np.percentile(returns,
95)) / \
np.abs(np.percentile(returns,
5))
def
capture(
returns,
factor_returns,
period=DAILY):
"""
Compute capture ratio.
Parameters
----------
returns : pd.Series or np.ndarray
Returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
Returns
-------
capture_ratio : float
Note
----
See http://www.investopedia.com/terms/u/up-market-capture-ratio.asp for
details.
"""
return (annual_return(returns,
period=period) /
annual_return(factor_returns,
period=period))
def
up_capture(
returns,
factor_returns, **
kwargs):
"""
Compute the capture ratio for periods when the benchmark return is positive
Parameters
----------
returns : pd.Series or np.ndarray
Returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
Returns
-------
up_capture : float
Note
----
See http://www.investopedia.com/terms/u/up-market-capture-ratio.asp for
more information.
"""
return up(returns, factor_returns,
function=capture, **kwargs)
def
down_capture(
returns,
factor_returns, **
kwargs):
"""
Compute the capture ratio for periods when the benchmark return is negative
Parameters
----------
returns : pd.Series or np.ndarray
Returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
Returns
-------
down_capture : float
Note
----
See http://www.investopedia.com/terms/d/down-market-capture-ratio.asp for
more information.
"""
return down(returns, factor_returns,
function=capture, **kwargs)
def
up_down_capture(
returns,
factor_returns, **
kwargs):
"""
Computes the ratio of up_capture to down_capture.
Parameters
----------
returns : pd.Series or np.ndarray
Returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
period : str, optional
Defines the periodicity of the 'returns' data for purposes of
annualizing. Value ignored if `annualization` parameter is specified.
Defaults are::
'monthly':12
'weekly': 52
'daily': 252
Returns
-------
up_down_capture : float
the updown capture ratio
"""
return (up_capture(returns, factor_returns, **kwargs) /
down_capture(returns, factor_returns, **kwargs))
def
up_alpha_beta(
returns,
factor_returns, **
kwargs):
"""
Computes alpha and beta for periods when the benchmark return is positive.
Parameters
----------
see documentation for `alpha_beta`.
Returns
-------
float
Alpha.
float
Beta.
"""
return up(returns, factor_returns,
function=alpha_beta_aligned, **kwargs)
def
down_alpha_beta(
returns,
factor_returns, **
kwargs):
"""
Computes alpha and beta for periods when the benchmark return is negative.
Parameters
----------
see documentation for `alpha_beta`.
Returns
-------
alpha : float
beta : float
"""
return down(returns, factor_returns,
function=alpha_beta_aligned, **kwargs)
def
roll_up_capture(
returns,
factor_returns,
window=
10, **
kwargs):
"""
Computes the up capture measure over a rolling window.
see documentation for :func:`~empyrical.stats.up_capture`.
(pass all args, kwargs required)
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
window : int, required
Size of the rolling window in terms of the periodicity of the data.
- eg window = 60, periodicity=DAILY, represents a rolling 60 day window
"""
return roll(returns, factor_returns,
window=window,
function=up_capture,
**kwargs)
def
roll_down_capture(
returns,
factor_returns,
window=
10, **
kwargs):
"""
Computes the down capture measure over a rolling window.
see documentation for :func:`~empyrical.stats.down_capture`.
(pass all args, kwargs required)
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
window : int, required
Size of the rolling window in terms of the periodicity of the data.
- eg window = 60, periodicity=DAILY, represents a rolling 60 day window
"""
return roll(returns, factor_returns,
window=window,
function=down_capture,
**kwargs)
def
roll_up_down_capture(
returns,
factor_returns,
window=
10, **
kwargs):
"""
Computes the up/down capture measure over a rolling window.
see documentation for :func:`~empyrical.stats.up_down_capture`.
(pass all args, kwargs required)
Parameters
----------
returns : pd.Series or np.ndarray
Daily returns of the strategy, noncumulative.
- See full explanation in :func:`~empyrical.stats.cum_returns`.
factor_returns : pd.Series or np.ndarray
Noncumulative returns of the factor to which beta is
computed. Usually a benchmark such as the market.
- This is in the same style as returns.
window : int, required
Size of the rolling window in terms of the periodicity of the data.
- eg window = 60, periodicity=DAILY, represents a rolling 60 day window
"""
return roll(returns, factor_returns,
window=window,
function=up_down_capture, **kwargs)
def
value_at_risk(
returns,
cutoff=
0.05):
"""
Value at risk (VaR) of a returns stream.
Parameters
----------
returns : pandas.Series or 1-D numpy.array
Non-cumulative daily returns.
cutoff : float, optional
Decimal representing the percentage cutoff for the bottom percentile of
returns. Defaults to 0.05.
Returns
-------
VaR : float
The VaR value.
"""
return np.percentile(returns,
100 * cutoff)
def
conditional_value_at_risk(
returns,
cutoff=
0.05):
"""
Conditional value at risk (CVaR) of a returns stream.
CVaR measures the expected single-day returns of an asset on that asset's
worst performing days, where "worst-performing" is defined as falling below
``cutoff`` as a percentile of all daily returns.
Parameters
----------
returns : pandas.Series or 1-D numpy.array
Non-cumulative daily returns.
cutoff : float, optional
Decimal representing the percentage cutoff for the bottom percentile of
returns. Defaults to 0.05.
Returns
-------
CVaR : float
The CVaR value.
"""
# PERF: Instead of using the 'value_at_risk' function to find the cutoff
# value, which requires a call to numpy.percentile, determine the cutoff
# index manually and partition out the lowest returns values. The value at
# the cutoff index should be included in the partition.
cutoff_index =
int((
len(returns) -
1) * cutoff)
return np.mean(np.partition(returns, cutoff_index)[:cutoff_index +
1])
SIMPLE_STAT_FUNCS = [
cum_returns_final,
annual_return,
annual_volatility,
sharpe_ratio,
calmar_ratio,
stability_of_timeseries,
max_drawdown,
omega_ratio,
sortino_ratio,
stats.skew,
stats.kurtosis,
tail_ratio,
cagr,
value_at_risk,
conditional_value_at_risk,
]
FACTOR_STAT_FUNCS = [
excess_sharpe,
alpha,
beta,
capture,
up_capture,
down_capture
]