python滑动窗口求回归——OLS和WLS

2023年3月28日源码参考

核心代码其实就一行

modle=regression.linear_model.OLS(temp_y,index_x).fit()

预备：

指定窗口大小，半衰期和数据长度，求一个权重序列：

Example:

在这里插入图片描述
权重计算函数：


#n是窗口大小
#j可以理解为距离当前计算日期的天数（backward）
#返回的数字是计算得出的权重的值
def cf(j,n,halflife):
    p1 = j # j从1开始，到n结束。p1是作为分子的
    p2 = halflife # p2是作为分母的
    p3 =  math.pow(0.5,p1/p2)
    return p3

def weight_list(length,halflife,window_size):
    out = list()
    # get weights
    for v in range(length):
        j = v + 1
        out.append(cf(j,window_size,halflife))
    return out

OLS版本：

import statsmodels.api as sm
from statsmodels import regression

# 子操作：窗口内的回归
# 输入解释变量序列x和被解释变量序列y，可以返回回归的系数beta
def linreg(x):
    x=sm.add_constant(x)
    global y
    index_x=x.index
    temp_y=y.loc[index_x] #y的index需要与df_x保持一致
    modle=regression.linear_model.OLS(temp_y,x).fit() # OLS的第一个参数是被解释变量
    return modle.params[0]


# 需要按列循环进行计算

#%%导入必须的包
from sklearn.linear_model 
import LinearRegression as LR #线性回归所需要的包
import pandas as pd

#%%滚动回归
y=Y.to_frame() #不得不用到的一个global变量，也就是回归的y。Y是大盘收益率，为Tx1结构
df2=close_ret_rate # 个股日收益率表，为TxN的结构
window_size=100
halflife=25

for each_column in df2.columns:
    print(each_column)
    # 
    df2[each_column]=df2[each_column].rolling(window_size).apply(lambda x:linreg(pd.DataFrame(x)))

如果要算WLS版本的回归

# 滚动回归的另一个轮子,有效！！！
# 需要按列循环进行计算
#%%导入必须的包
from sklearn.linear_model import LinearRegression as LR #线性回归所需要的包
import pandas as pd

def cf(j,n,halflife):
    p1 = j # j从1开始，到n结束。p1是作为分子的
    p2 = halflife # p2是作为分母的
    p3 =  math.pow(0.5,p1/p2)
    return p3

def weight_list(halflife,window_size):
    out = list()
    # get weights
    for v in range(window_size):
        j = v + 1
        out.append(cf(j,window_size,halflife))
    return out

#%%滚动回归
y=Y.to_frame() #不得不用到的一个global变量，也就是回归的y
df2=close_ret_rate
window_size=60
halflife=30
weight_list=weight_list(halflife,window_size)
print(weight_list)

#首先定义一下apply里要用到的函数
def rolling_regression(df_x):  #df_x是rolling取出来的回归的x，是一个50乘1的df
    global y
    global weight_list
    df_x=sm.add_constant(df_x)
    index_x=df_x.index
    temp_y=y.loc[index_x] #y的index需要与df_x保持一致
    modle=regression.linear_model.WLS(temp_y,df_x,weight_list).fit()    
    return model.coef_[1]

for each_column in df2.columns:
    print(each_column)
    df2[each_column]=df2[each_column].rolling(60).apply(lambda x:linreg(pd.DataFrame(x)))

版权声明：本文为weixin_37726222原创文章，遵循 CC 4.0 BY-SA 版权协议，转载请附上原文出处链接和本声明。

原文链接：https://blog.csdn.net/weixin_37726222/article/details/126960486

上一篇文章
python 远程线程注入代码下一篇文章
判断4位回文数-课后程序(Python程序开发案例教程-黑马程序员编著-第3章-课后作业)