diff --git a/pyfolio/plotting.py b/pyfolio/plotting.py index 204b097d..52262f14 100644 --- a/pyfolio/plotting.py +++ b/pyfolio/plotting.py @@ -90,20 +90,20 @@ def plot_rolling_risk_factors( rolling_risk_multifactor = timeseries.rolling_multifactor_beta( returns, risk_factors.loc[:, ['SMB', 'HML', 'UMD']], - rolling_window=rolling_beta_window) + window=rolling_beta_window) rolling_beta_SMB = timeseries.rolling_beta( returns, risk_factors['SMB'], - rolling_window=rolling_beta_window) + window=rolling_beta_window) rolling_beta_HML = timeseries.rolling_beta( returns, risk_factors['HML'], - rolling_window=rolling_beta_window) + window=rolling_beta_window) rolling_beta_UMD = timeseries.rolling_beta( returns, risk_factors['UMD'], - rolling_window=rolling_beta_window) + window=rolling_beta_window) rolling_beta_SMB.plot(color='steelblue', alpha=0.7, ax=ax, **kwargs) rolling_beta_HML.plot(color='orangered', alpha=0.7, ax=ax, **kwargs) @@ -608,10 +608,10 @@ def plot_rolling_beta(returns, benchmark_rets, ax.set_title("Rolling Portfolio Beta to S&P 500") ax.set_ylabel('Beta') rb_1 = timeseries.rolling_beta( - returns, benchmark_rets, rolling_window=rolling_beta_window * 2) + returns, benchmark_rets, window=rolling_beta_window * 2)['x'] rb_1.plot(color='steelblue', lw=3, alpha=0.6, ax=ax, **kwargs) rb_2 = timeseries.rolling_beta( - returns, benchmark_rets, rolling_window=rolling_beta_window * 3) + returns, benchmark_rets, window=rolling_beta_window * 3)['x'] rb_2.plot(color='grey', lw=3, alpha=0.4, ax=ax, **kwargs) ax.set_ylim((-2.5, 2.5)) ax.axhline(rb_1.mean(), color='steelblue', linestyle='--', lw=3) diff --git a/pyfolio/tests/test_timeseries.py b/pyfolio/tests/test_timeseries.py index 3264f8ba..31487ca8 100644 --- a/pyfolio/tests/test_timeseries.py +++ b/pyfolio/tests/test_timeseries.py @@ -1,4 +1,4 @@ -from unittest import TestCase +import unittest from nose_parameterized import parameterized import numpy as np @@ -7,8 +7,9 @@ from .. import timeseries +DECIMAL_PLACES = 9 -class TestDrawdown(TestCase): +class TestDrawdown(unittest.TestCase): px_list_1 = np.array( [100, 120, 100, 80, 70, 110, 180, 150]) / 100. # Simple px_list_2 = np.array( @@ -106,13 +107,13 @@ def test_gen_drawdown_table( (pd.Series(px_list_1 - 1, index=dt), -0.44000000000000011) ]) def test_max_drawdown(self, df_rets, expected): - self.assertEqual(timeseries.max_drawdown(df_rets), expected) + self.assertAlmostEqual(timeseries.max_drawdown(df_rets), expected, DECIMAL_PLACES) @parameterized.expand([ (pd.Series(px_list_1 - 1, index=dt), -0.44000000000000011) ]) def test_max_drawdown_underwater(self, underwater, expected): - self.assertEqual(timeseries.max_drawdown(underwater), expected) + self.assertAlmostEqual(timeseries.max_drawdown(underwater), expected, DECIMAL_PLACES) @parameterized.expand([ (pd.Series(px_list_1, @@ -130,7 +131,7 @@ def test_top_drawdowns(self, df_rets, top, expected): expected) -class TestCumReturns(TestCase): +class TestCumReturns(unittest.TestCase): dt = pd.date_range('2000-1-3', periods=3, freq='D') @parameterized.expand([ @@ -144,7 +145,7 @@ def test_expected_result(self, input, expected, starting_value): pdt.assert_series_equal(output, expected) -class TestVariance(TestCase): +class TestVariance(unittest.TestCase): @parameterized.expand([ (1e7, 0.5, 1, 1, -10000000.0) @@ -159,7 +160,7 @@ def test_var_cov_var_normal(self, P, c, mu, sigma, expected): expected) -class TestNormalize(TestCase): +class TestNormalize(unittest.TestCase): dt = pd.date_range('2000-1-3', periods=8, freq='D') px_list = [1.0, 1.2, 1.0, 0.8, 0.7, 0.8, 0.8, 0.8] @@ -171,7 +172,7 @@ def test_normalize(self, df, expected): self.assertTrue(timeseries.normalize(df).equals(expected)) -class TestAggregateReturns(TestCase): +class TestAggregateReturns(unittest.TestCase): simple_rets = pd.Series( [0.1] * 3 + [0] * 497, pd.date_range( @@ -192,7 +193,7 @@ def test_aggregate_rets(self, df_rets, convert_to, expected): expected) -class TestStats(TestCase): +class TestStats(unittest.TestCase): simple_rets = pd.Series( [0.1] * 3 + [0] * 497, pd.date_range( @@ -224,18 +225,18 @@ def test_annual_ret(self, df_rets, style, expected): (simple_rets, 0.12271674212427248) ]) def test_annual_volatility(self, df_rets, expected): - self.assertEqual(timeseries.annual_volatility(df_rets), expected) + self.assertAlmostEqual(timeseries.annual_volatility(df_rets), expected, DECIMAL_PLACES) @parameterized.expand([ (simple_rets, 'calendar', 1.7112579454508172), (simple_rets, 'compound', 1.3297007080039505) ]) def test_sharpe(self, df_rets, returns_style, expected): - self.assertEqual( + self.assertAlmostEqual( timeseries.sharpe_ratio( df_rets, returns_style=returns_style), - expected) + expected, DECIMAL_PLACES) @parameterized.expand([ (simple_rets[:5], 2, '[nan, inf, inf, 11.224972160321828, inf]') @@ -248,25 +249,25 @@ def test_sharpe_2(self, df_rets, rolling_sharpe_window, expected): (simple_rets, True, 0.010766923838471554) ]) def test_stability_of_timeseries(self, df_rets, logValue, expected): - self.assertEqual( + self.assertAlmostEqual( timeseries.stability_of_timeseries( df_rets, logValue=logValue), - expected) + expected, DECIMAL_PLACES) @parameterized.expand([ (simple_rets[:5], simple_benchmark[:5], 2, 8.024708101613483e-32) ]) - def test_beta(self, df_rets, benchmark_rets, rolling_window, expected): + def test_beta(self, df_rets, benchmark_rets, window, expected): self.assertEqual( timeseries.rolling_beta( df_rets, benchmark_rets, - rolling_window=rolling_window).values.tolist()[2], + window=window).values.tolist()[2], expected) -class TestMultifactor(TestCase): +class TestMultifactor(unittest.TestCase): simple_rets = pd.Series( [0.1] * 3 + [0] * 497, pd.date_range( @@ -300,16 +301,16 @@ def test_calc_multifactor(self, df_rets, factors, expected): 0.002997302427814967]) ]) def test_multifactor_beta( - self, df_rets, benchmark_df, rolling_window, expected): + self, df_rets, benchmark_df, window, expected): self.assertEqual( timeseries.rolling_multifactor_beta( df_rets, benchmark_df, - rolling_window=rolling_window).values.tolist()[2], + window=window).values.tolist()[2], expected) -class TestPerfStats(TestCase): +class TestPerfStats(unittest.TestCase): simple_rets = pd.Series( [0.1] * 3 + [0] * 497, pd.date_range( @@ -335,4 +336,4 @@ def test_perf_stats( self.assertEqual(timeseries.perf_stats(df_rets, returns_style=returns_style, return_as_dict=return_as_dict).values.tolist()[-2:], - expected) + expected) diff --git a/pyfolio/timeseries.py b/pyfolio/timeseries.py index 04b645e3..9c84ef5b 100644 --- a/pyfolio/timeseries.py +++ b/pyfolio/timeseries.py @@ -22,12 +22,62 @@ import numpy as np import scipy as sp import scipy.stats as stats -import scipy.signal as signal from sklearn import preprocessing import statsmodels.api as sm -import datetime + + +def regression(y, x, add_constant=True): + """ + Generalized regression calculation that + returns the ols model for situations where + other statistical values are needed. + + Parameters + ---------- + y : Series/DataFrame + dependent variable + x : Series/DataFrame + independent variable(s) + add_constant : boolean + if True a constant term is added + to the regression model. + + returns + ------- + pandas.stats.ols.OLS + regression model + """ + return pd.ols(y=y, x=x, intercept=add_constant) + + +def rolling_regression(y, x, window, add_constant=True): + """ + Generalized rolling regression calculation that + returns the ols model for situations where + other statistical values are needed. + + Parameters + ---------- + y : Series/DataFrame + dependent variable + x : Series/DataFrame + independent variable(s) + window : int + lookback window length used in + the rolling regression + add_constant : boolean + if True a constant term is added + to the regression model. + + returns + ------- + pandas.stats.ols.OLS + regression model + """ + return pd.ols(y=y, x=x, window=window, + window_type='rolling', intercept=add_constant) def var_cov_var_normal(P, c, mu=0, sigma=1): @@ -277,7 +327,8 @@ def omega_ratio(returns, annual_return_threshhold=0.0): returns : pd.Series Daily returns of the strategy, non-cumulative. annual_return_threshold : float, optional - Threshold over which to consider positive vs negative returns. For the ratio, it will be converted to a daily return and compared to returns. + Threshold over which to consider positive vs negative returns. + For the ratio, it will be converted to a daily return and compared to returns. Returns ------- @@ -384,12 +435,11 @@ def stability_of_timeseries(returns, logValue=True): df_cum_rets.values) if logValue else df_cum_rets.values len_returns = df_cum_rets.size - X = list(range(0, len_returns)) - X = sm.add_constant(X) + X = pd.Series(range(len_returns), index=returns.index) - model = sm.OLS(temp_values, X).fit() - - return model.rsquared + model = regression(temp_values, X, add_constant=True) + # TODO: should adjusted rsquared be used? + return model.r2 def out_of_sample_vs_in_sample_returns_kde( @@ -458,12 +508,12 @@ def out_of_sample_vs_in_sample_returns_kde( return np.nan kde_diff = sum(abs(scipy_kde_test - scipy_kde_train)) / \ - (sum(scipy_kde_train) + sum(scipy_kde_test)) + (sum(scipy_kde_train) + sum(scipy_kde_test)) return kde_diff -def calc_multifactor(returns, factors): +def calc_multifactor(returns, factors, add_constant=True): """ Computes multiple ordinary least squares linear fits, and returns fit parameters. @@ -479,17 +529,11 @@ def calc_multifactor(returns, factors): pd.DataFrame Fit parameters. """ - - import statsmodels.api as sm - factors = factors.loc[returns.index] - factors = sm.add_constant(factors) - factors = factors.dropna(axis=0) - results = sm.OLS(returns[factors.index], factors).fit() - - return results.params + model = regression(returns, factors, add_constant=add_constant) + return model.beta -def rolling_beta(returns, benchmark_rets, rolling_window=63): +def rolling_beta(returns, benchmark_rets, window=63, add_constant=True): """ Determines the rolling beta of a strategy. @@ -512,16 +556,13 @@ def rolling_beta(returns, benchmark_rets, rolling_window=63): See https://en.wikipedia.org/wiki/Beta_(finance) for more details. """ - out = pd.Series(index=returns.index) - for beg, end in zip(returns.index[0:-rolling_window], - returns.index[rolling_window:]): - out.loc[end] = calc_alpha_beta(returns.loc[beg:end], - benchmark_rets.loc[beg:end])[1] + model = rolling_regression(returns, benchmark_rets, + window, add_constant=add_constant) + return model.beta - return out - -def rolling_multifactor_beta(returns, df_multi_factor, rolling_window=63): +def rolling_multifactor_beta(returns, df_multi_factor, + window=63, add_constant=True): """ Determines the rolling beta of multiple factors. @@ -543,16 +584,9 @@ def rolling_multifactor_beta(returns, df_multi_factor, rolling_window=63): ----- See https://en.wikipedia.org/wiki/Beta_(finance) for more details. """ - - out = pd.DataFrame(columns=['const'] + list(df_multi_factor.columns), - index=returns.index) - - for beg, end in zip(returns.index[0:-rolling_window], - returns.index[rolling_window:]): - out.loc[end] = calc_multifactor(returns.loc[beg:end], - df_multi_factor.loc[beg:end]) - - return out + model = rolling_regression(returns, df_multi_factor, + window, add_constant=add_constant) + return model.beta def calc_alpha_beta(returns, benchmark_rets): @@ -573,10 +607,8 @@ def calc_alpha_beta(returns, benchmark_rets): float Beta. """ - - ret_index = returns.index - beta, alpha = sp.stats.linregress(benchmark_rets.loc[ret_index].values, - returns.values)[:2] + model = regression(returns, benchmark_rets, add_constant=True) + beta, alpha = model.beta return alpha * 252, beta @@ -767,7 +799,7 @@ def gen_drawdown_table(returns, top=10): df_drawdowns.loc[i, 'valley date'] = valley df_drawdowns.loc[i, 'recovery date'] = recovery df_drawdowns.loc[i, 'net drawdown in %'] = ( - (df_cum.loc[peak] - df_cum.loc[valley]) / df_cum.loc[peak]) * 100 + (df_cum.loc[peak] - df_cum.loc[valley]) / df_cum.loc[peak]) * 100 df_drawdowns['peak date'] = pd.to_datetime( df_drawdowns['peak date'], @@ -804,7 +836,7 @@ def rolling_sharpe(returns, rolling_sharpe_window): """ return pd.rolling_mean(returns, rolling_sharpe_window) \ - / pd.rolling_std(returns, rolling_sharpe_window) * np.sqrt(252) + / pd.rolling_std(returns, rolling_sharpe_window) * np.sqrt(252) def cone_rolling( @@ -834,12 +866,10 @@ def cone_rolling( perf_ts = cum_returns(returns, 1) - X = list(range(0, perf_ts.size)) - X = sm.add_constant(X) - sm.OLS(perf_ts, list(range(0, len(perf_ts)))) - line_ols = sm.OLS(perf_ts.values, X).fit() - fit_line_ols_coef = line_ols.params[1] - fit_line_ols_inter = line_ols.params[0] + X = pd.Series(range(perf_ts.size), index=perf_ts.index) + + line_ols = regression(perf_ts, X, add_constant=True) + fit_line_ols_coef, fit_line_ols_inter = line_ols.beta x_points = list(range(0, perf_ts.size)) x_points = np.array(x_points) * fit_line_ols_coef + fit_line_ols_inter @@ -870,21 +900,19 @@ def cone_rolling( line_ols_coef = fit_line_ols_coef line_ols_inter = fit_line_ols_inter else: - X = list(range(0, perf_ts.size)) - X = sm.add_constant(X) + X = pd.Series(range(perf_ts.size), index=perf_ts.index) sm.OLS(perf_ts, list(range(0, len(perf_ts)))) - line_ols = sm.OLS(perf_ts.values, X).fit() - line_ols_coef = line_ols.params[1] - line_ols_inter = line_ols.params[0] + line_ols = regression(perf_ts, X, add_constant=True) + line_ols_coef, line_ols_inter = line_ols.beta x_points = list(range(0, perf_ts.size)) x_points = np.array(x_points) * line_ols_coef + \ - line_ols_inter + oos_intercept_shift + line_ols_inter + oos_intercept_shift temp_line = x_points if update_std_oos_rolling: std_pct = np.sqrt(new_cone_day_scale_factor) * \ - np.std(perf_ts.pct_change().dropna()) + np.std(perf_ts.pct_change().dropna()) else: std_pct = np.sqrt(new_cone_day_scale_factor) * warm_up_std_pct @@ -914,7 +942,7 @@ def cone_rolling( x_points = list(range(perf_ts.size, perf_ts.size + extend_ahead_days)) x_points = np.array(x_points) * line_ols_coef + line_ols_inter + \ - oos_intercept_shift + future_cone_intercept_shift + oos_intercept_shift + future_cone_intercept_shift temp_line = x_points temp_sd_up = temp_line * (1 + num_stdev * std_pct) temp_sd_down = temp_line * (1 - num_stdev * std_pct)