文献来源
- Huang, W., et al. (2022). Convolutional neural network forecasting of European Union allowances futures using a novel unconstrained transformation method
- Appendix A. Supplementary data【数据+R+Python】
基本理论
多元线性回归(MLR)是建立自变量和因变量之间线性关系的最经典回归方法。MLR公式如下:
\hat{y}_t = β_1Y + β_2U_{t 1} + b + ε
- β_1∈^{4×16} :回归系数1
- β_2∈^{4×45} :回归系数2
- b∈^{4×1} :常数项
- ε∈^{4×1} : Е(ε) = (0,0,0,0)^T 的残差项(Montgomery等, 2021)
示例代码采用普通最小二乘法OLS对系数进行估计。
- Montgomery,D.C., Peck,E.A., & Vining,G.G. (2021). Introduction to Linear Regression Analysis. John Wiley Sons.
示例代码
setwd("D:\\Download\\1-s2.0-S0140988322002171-mmc1\\Simulation Code and Data")
rm(list=ls())
unconstrained_data<-read.csv("File_2_Data_After_Unconstrained_Transformation.csv", header=TRUE)
actual_data<-read.csv("File_1_Data_Before_Unconstrained_Transformation.csv", header=TRUE)
actual_Y<-actual_data[5:nrow(actual_data),]
# 构造输入数据集:EUA前4天的价格,前一天的解释变量
unconstrained_data_1<-unconstrained_data[4:(nrow(unconstrained_data)-1),2:5]
names(unconstrained_data_1)<-c("EUA_1_1","EUA_2_1","EUA_3_1","EUA_4_1")
unconstrained_data_2<-unconstrained_data[3:(nrow(unconstrained_data)-2),2:5]
names(unconstrained_data_2)<-c("EUA_1_2","EUA_2_2","EUA_3_2","EUA_4_2")
unconstrained_data_3<-unconstrained_data[2:(nrow(unconstrained_data)-3),2:5]
names(unconstrained_data_3
)<-c("EUA_1_3","EUA_2_3","EUA_3_3","EUA_4_3")
unconstrained_data_4<-unconstrained_data[1:(nrow(unconstrained_data)-4),2:5]
names(unconstrained_data_4)<-c("EUA_1_4","EUA_2_4","EUA_3_4","EUA_4_4")
explantory_variables<-unconstrained_data[4:(nrow(unconstrained_data)-1),6:50]
explantory_variables[,45]<-as.factor(explantory_variables[,45])
# 自变量
explantory_data<-cbind(unconstrained_data_1,unconstrained_data_2,unconstrained_data_3,
unconstrained_data_4,explantory_variables)
# 因变量
response_variable<-unconstrained_data[5:nrow(unconstrained_data),2:5]
dependentVariables<-response_variable
explainVariables<-explantory_data
# 训练集和测试集
dependentVariables_train<-response_variable[1:(nrow(response_variable)*0.8),]
explainVariables_train<-explantory_data[1:(nrow(explantory_data)*0.8),]
dependentVariables_test<-response_variable[(nrow(response_variable)*0.8+1):(nrow(response_variable)+1),]
explainVariables_test<-explantory_data[(nrow(explantory_data)*0.8+1):(nrow(explantory_data)+1),]
actual_Y<-actual_Y[(nrow(actual_Y)*0.8+1):(nrow(actual_Y)+1),]
# 用OLS预测y1 y2 y3 y4
y1<-cbind(dependentVariables_train[,1],explainVariables_train)
y1_fit<-lm(y1[,1]~.,data=explainVariables_train)
y1_pre<-predict(y1_fit,explainVariables_test)
y2<-cbind(dependentVariables_train[,2],explainVariables_train)
y2_fit<-lm(y2[,1]~.,data=explainVariables_train)
y2_pre<-predict(y2_fit,explainVariables_test)
y3<-cbind(dependentVariables_train[,3],explainVariables_train)
y3_fit<-lm(y3[,1]~.,data=explainVariables_train)
y3_pre<-predict(y3_fit,explainVariables_test)
y4<-cbind(dependentVariables_train[,4],explainVariables_train)
y4_fit<-lm(y4[,1]~.,data=explainVariables_train)
y4_pre<-predict(y4_fit,explainVariables_test)
# 逆变换
flow<-exp(y1_pre)
fhigh<-exp(y1_pre)+exp(y2_pre)
flambda1<-exp(y3_pre)/(exp(y3_pre)+1)
flambda2<-exp(y4_pre)/(exp(y4_pre)+1)
fopen<-flambda1*fhigh+(1-flambda1)*flow
fclose<-flambda2*fhigh+(1-flambda2)*flow
fcenter<-(fhigh+flow)/2
frange<-(fhigh-flow)/2
open_MAPE<-0
high_MAPE<-0
low_MAPE<-0
close_MAPE<-0
open_MAE<-0
high_MAE<-0
low_MAE<-0
close_MAE<-0
open_RMSE<-0
high_RMSE<-0
low_RMSE<-0
close_RMSE<-0
AR<-0
open_MAE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
high_MAE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
low_MAE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
close_MAE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
open_MAPE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
high_MAPE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
low_MAPE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
close_MAPE_record<-matrix(nrow=length(fopen),ncol=1,data=NA)
for(i in 1:length(fopen)){
#MAPE
open_MAPE<-open_MAPE+abs((fopen[i]-actual_Y[i,2])/actual_Y[i,2])
open_MAPE_record[i,1]<-abs((fopen[i]-actual_Y[i,2])/actual_Y[i,2])
high_MAPE<-high_MAPE+abs((fhigh[i]-actual_Y[i,3])/actual_Y[i,3])
high_MAPE_record[i,1]<-abs((fhigh[i]-actual_Y[i,3])/actual_Y[i,3])
low_MAPE<-low_MAPE+abs((flow[i]-actual_Y[i,4])/actual_Y[i,4])
low_MAPE_record[i,1]<-abs((flow[i]-actual_Y[i,4])/actual_Y[i,4])
close_MAPE<-close_MAPE+abs((fclose[i]-actual_Y[i,5])/actual_Y[i,5])
close_MAPE_record[i,1]<-abs((fclose[i]-actual_Y[i,5])/actual_Y[i,5])
#MAE
open_MAE<-open_MAE+abs((fopen[i]-actual_Y[i,2]))
open_MAE_record[i,1]<-abs((fopen[i]-actual_Y[i,2]))
high_MAE<-high_MAE+abs((fhigh[i]-actual_Y[i,3]))
high_MAE_record[i,1]<-abs((fhigh[i]-actual_Y[i,3]))
low_MAE<-low_MAE+abs((flow[i]-actual_Y[i,4]))
low_MAE_record[i,1]<-abs((flow[i]-actual_Y[i,4]))
close_MAE<-close_MAE+abs((fclose[i]-actual_Y[i,5]))
close_MAE_record[i,1]<-abs((fclose[i]-actual_Y[i,5]))
#RMSE
open_RMSE<-open_RMSE+abs((fopen[i]-actual_Y[i,2]))^2
high_RMSE<-high_RMSE+abs((fhigh[i]-actual_Y[i,3]))^2
low_RMSE<-low_RMSE+abs((flow[i]-actual_Y[i,4]))^2
close_RMSE<-close_RMSE+abs((fclose[i]-actual_Y[i,5]))^2
#AR
Y_u_fit<-fhigh[i]
Y_l_fit<-flow[i]
Y_upper_test<-actual_Y[i,3]
Y_lower_test<-actual_Y[i,4]
s=max(min(Y_u_fit,Y_upper_test)-max(Y_l_fit,Y_lower_test),0)
o=max(Y_u_fit,Y_upper_test)-min(Y_l_fit,Y_lower_test)-max(max(Y_l_fit,Y_lower_test)-min(Y_u_fit,Y_upper_test),0)
AR=AR+s/o
}
options(digits=5)
open_MAPE<-open_MAPE/length(fopen)
high_MAPE<-high_MAPE/length(fopen)
low_MAPE<-low_MAPE/length(fopen)
close_MAPE<-close_MAPE/length(fopen)
MLR_MAPE<-(open_MAPE+high_MAPE+low_MAPE+close_MAPE)/4
open_MAE<-open_MAE/length(fopen)
high_MAE<-high_MAE/length(fopen)
low_MAE<-low_MAE/length(fopen)
close_MAE<-close_MAE/length(fopen)
MLR_MAE<-(open_MAE+high_MAE+low_MAE+close_MAE)/4
open_RMSE<-(open_RMSE/length(fopen))^0.5
high_RMSE<-(high_RMSE/length(fopen))^0.5
low_RMSE<-(low_RMSE/length(fopen))^0.5
close_RMSE<-(close_RMSE/length(fopen))^0.5
MLR_RMSE<-((open_RMSE^2+high_RMSE^2+low_RMSE^2+close_RMSE^2)/4)^0.5
MLR_AR<-AR/length(fopen)
# 展示多元线性回归的结果:MAPE MAE RMSE AR
MLR_MAPE
MLR_MAE
MLR_RMSE
MLR_AR (完)