文献来源
- Huang, W., et al. (2022). Convolutional neural network forecasting of European Union allowances futures using a novel unconstrained transformation method
- Appendix A. Supplementary data【数据+R+Python】
基本理论
Vapnik(1998)提出的支持向量机(SVM)是基于统计学习理论的一种新的通用学习方法。SVM可以有效地解决人工神经网络的缺点,如网络构建问题、过拟合问题和局部最小困境(Kavousi-Fard等,2014)。目前,支持向量机已经成功地应用于许多领域。一般采用支持向量机进行分类问题。支持向量回归(SVR)旨在处理一般预测问题,包括用于回归的SVM版本(Beyca等,2019)。
- Beyca, O. F., Ervural, B. C., Tatoglu, E., Ozuyar, P. G., & Zaim, S. (2019).Using machine learning tools for forecasting natural gas consumption in the province of Istanbul. Energy Economics.
- Kavousi-Fard, A., Samet, H., & Marzbani, F. (2014).A new hybrid Modified Firefly Algorithm and Support Vector Regression model for accurate Short Term Load Forecasting. Expert Systems with Applications, 41(13), 6047–6056.
- Vapnik,V.N. (1998). Statistical Learning Theory (Adaptive and Learning Systems for Signal Processing, Communications and Control). Wiley, New York.
示例代码
setwd("D:\\Download\\1-s2.0-S0140988322002171-mmc1\\Simulation Code and Data")
rm(list=ls())
library(e1071)
unconstrained_data<-read.csv("File_2_Data_After_Unconstrained_Transformation.csv", header=TRUE)
actual_data<-read.csv("File_1_Data_Before_Unconstrained_Transformation.csv", header=TRUE)
actual_Y<-actual_data[5:nrow(actual_data),]
# 构造输入数据集:EUA前4天的价格,前一天的解释变量
unconstrained_data_1<-unconstrained_data[4:(nrow(unconstrained_data)-1),2:5]
names(unconstrained_data_1)<-c("EUA_1_1","EUA_2_1","EUA_3_1","EUA_4_1")
unconstrained_data_2<-unconstrained_data[3:(nrow(unconstrained_data)-2),2:5]
names(unconstrained_data_2)<-c("EUA_1_2","EUA_2_2","EUA_3_2","EUA_4_2")
unconstrained_data_3<-unconstrained_data[2:(nrow(unconstrained_data)-3),2:5]
names(unconstrained_data_3)<-c("EUA_1_3","EUA_2_3","EUA_3_3","EUA_4_3")
unconstrained_data_4<-unconstrained_data[1:(nrow(unconstrained_data)-4),2:5]
names(unconstrained_data_4)<-c("EUA_1_4","EUA_2_4","EUA_3_4","EUA_4_4")
explantory_variables<-unconstrained_data[4:(nrow(unconstrained_data)-1),6:50]
explantory_variables[,45]<-as.factor(explantory_variables[,45])
# 自变量
explantory_data<-cbind(unconstrained_data_1,unconstrained_data_2,unconstrained_data_3,
unconstrained_data_4,explantory_variables)
# 因变量
response_variable<-unconstrained_data[5:nrow(unconstrained_data),2:5]
library(dummy)
explantory_data_dummy<-dummy(x=explantory_data)
for(i in 1:3){
explantory_data_dummy[,i]<-as.numeric(explantory_data_dummy[,i])
}
explantory_data<-cbind(explantory_data[,-61],explantory_data_dummy)
a<-explantory_data
b<-response_variable
response_variable_train<-b[1:(nrow(a)*0.8),]
explantory_data_train<-a[1:(nrow(b)*0.8),]
response_variable_test<-b[(nrow(a)*0.8+1):(nrow(a)+1),]
explantory_data_test<-a[(nrow(b)*0.8+1):(nrow(b)+1),]
actual_Y<-actual_Y[(nrow(actual_Y)*0.8+1):(nrow(actual_Y)+1),]
for(col in 1:4){
x<-explantory_data_train
y<-response_variable_train[,col]
model<-svm(x,y,type="nu-regression",kernel="linear",
gamma=if(is.vector(x)) 1 else 1/ ncol(x))
result<-predict(model,explantory_data_test)
if(col==1){
y1_pre<-result
}
if(col==2){
y2_pre<-result
}
if(col==3){
y3_pre<-result
}
if(col==4){
y4_pre<-result
}
}
flow<-exp(y1_pre)
fhigh<-exp(y1_pre)+exp(y2_pre)
flambda1<-exp(y3_pre)/(exp(y3_pre)+1)
flambda2<-exp(y4_pre)/(exp(y4_pre)+1)
fopen<-flambda1*fhigh+(1-flambda1)*flow
fclose<-flambda2*fhigh+(1-flambda2)*flow
fcenter<-(fhigh+flow)/2
frange<-(fhigh-flow)/2
open_MAPE<-0
high_MAPE<-0
low_MAPE<-0
close_MAPE<-0
open_MAE<-0
high_MAE<-0
low_MAE<-0
close_MAE<-0
open_RMSE<-0
high_RMSE<-0
low_RMSE<-0
close_RMSE<-0
RMSEH<-0
AR<-0
for(i in 1:length(fopen)){
#MAPE
open_MAPE<-open_MAPE+abs((fopen[i]-actual_Y[i,2])/actual_Y[i,2])
high_MAPE<-high_MAPE+abs((fhigh[i]-actual_Y[i,3])/actual_Y[i,3])
low_MAPE<-low_MAPE+abs((flow[i]-actual_Y[i,4])/actual_Y[i,4])
close_MAPE<-close_MAPE+abs((fclose[i]-actual_Y[i,5])/actual_Y[i,5])
#MAE
open_MAE<-open_MAE+abs((fopen[i]-actual_Y[i,2]))
high_MAE<-high_MAE+abs((fhigh[i]-actual_Y[i,3]))
low_MAE<-low_MAE+abs((flow[i]-actual_Y[i,4]))
close_MAE<-close_MAE+abs((fclose[i]-actual_Y[i,5]))
#RMSE
open_RMSE<-open_RMSE+abs((fopen[i]-actual_Y[i,2]))^2
high_RMSE<-high_RMSE+abs((fhigh[i]-actual_Y[i,3]))^2
low_RMSE<-low_RMSE+abs((flow[i]-actual_Y[i,4]))^2
close_RMSE<-close_RMSE+abs((fclose[i]-actual_Y[i,5]))^2
#RMSEH
center<-(actual_Y[i,3]+actual_Y[i,4])/2
range<-(actual_Y[i,3]-actual_Y[i,4])/2
RMSEH<-RMSEH+(abs(center-fcenter[i])+abs(range-frange[i]))^2
#AR
Y_u_fit<-fhigh[i]
Y_l_fit<-flow[i]
Y_upper_test<-actual_Y[i,3]
Y_lower_test<-actual_Y[i,4]
s=max(min(Y_u_fit,Y_upper_test)-max(Y_l_fit,Y_lower_test),0)
o=max(Y_u_fit,Y_upper_test)-min(Y_l_fit,Y_lower_test)-max(max(Y_l_fit,Y_lower_test)-min(Y_u_fit,Y_upper_test),0)
AR=AR+s/o
}
options(digits=5)
open_MAPE<-open_MAPE/length(fopen)
high_MAPE<-high_MAPE/length(fopen)
low_MAPE<-low_MAPE/length(fopen)
close_MAPE<-close_MAPE/length(fopen)
SVM_MAPE<-(open_MAPE+high_MAPE+low_MAPE+close_MAPE)/4
open_MAE<-open_MAE/length(fopen)
high_MAE<-high_MAE/length(fopen)
low_MAE<-low_MAE/length(fopen)
close_MAE<-close_MAE/length(fopen)
SVM_MAE<-(open_MAE+high_MAE+low_MAE+close_MAE)/4
open_RMSE<-(open_RMSE/length(fopen))^0.5
high_RMSE<-(high_RMSE/length(fopen))^0.5
low_RMSE<-(low_RMSE/length(fopen))^0.5
close_RMSE<-(close_RMSE/length(fopen))^0.5
SVM_RMSE<-((open_RMSE^2+high_RMSE^2+low_RMSE^2+close_RMSE^2)/4)^0.5
SVM_AR<-AR/length(fopen)
# 展示支持向量机的结果:MAPE MAE RMSE AR
SVM_MAPE
SVM_MAE
SVM_RMSE
SVM_AR
(完)