佛山建站专家,wordpress英文版改成中文字体,建设网站有哪些好处,摄影婚纱网站建设很多算法比赛经常会遇到不同的物体产生同含义的时间序列信息#xff0c;比如不同位置的时间序列信息#xff0c;风力发电、充电桩用电。经常会遇到该如此场景#xff0c;对所有数据做统一处理喂给模型#xff0c;模型很难学到区分信息#xff0c;因此设计如果对不同位置的… 很多算法比赛经常会遇到不同的物体产生同含义的时间序列信息比如不同位置的时间序列信息风力发电、充电桩用电。经常会遇到该如此场景对所有数据做统一处理喂给模型模型很难学到区分信息因此设计如果对不同位置的装置做嵌入操作这也是本文书写的主要目的之一如果对不同位置装置的时序数据做模型呢 RGU 循环神经网络模块经常用于处理时序数据。 Embedding : 是 PyTorch 中的一个类用于将离散的整数序列映射为连续的向量表示。
使用下面比赛的数据作为一个处理的DEMO 2023中国华录杯数据湖算法大赛 import package
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
#import tushare as ts
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from torch.utils.data import TensorDataset
from tqdm import tqdm
from torch.utils.data import Dataset, DataLoaderfrom sklearn.preprocessing import LabelEncoderimport matplotlib.pyplot as plt
import tqdm
import sys
import os
import gc
import argparse
import warningswarnings.filterwarnings(ignore)load data
class Config():#data_path ../data/data1/train/power.csvtimestep 14 # 时间步长就是利用多少时间窗口batch_size 32 # 批次大小feature_size 1 # 每个步长对应的特征数量这里只使用1维每天的风速hidden_size 56 # 隐层大小output_size 1 # 由于是单输出任务最终输出层大小为1预测未来1天风速num_layers 1 # lstm的层数epochs 10 # 迭代轮数best_loss 0 # 记录损失learning_rate 0.00003 # 学习率model_name lstm # 模型名称save_path ./{}.pth.format(model_name) # 最优模型保存路径
config Config()train_df pd.read_csv(../初赛数据/phase1_train.csv)
test_df pd.read_csv(../初赛数据/phase1_test.csv)labelEncoder LabelEncoder()
train_df[line_label] labelEncoder.fit_transform(train_df[line])
#labelEncoder.transform(test_df[line])train_df train_df.sort_values([line,date]).reset_index(dropTrue)train_df.line.unique()array([L01, L02, L03, L04, L05, L06, L08, L09, L10],dtypeobject)
使用前面14天预测未来第七天
1234567891011121314 -》147
【1234567891011121314】1 -》 1471
。。。。。
#train_df.head()
his_pow_feats []
for i in range(config.timestep):train_df[fshift_{7i}] train_df.groupby(line_label)[passenger_flow].shift(7i)his_pow_feats.append(fshift_{7i})
train_df_drop_na train_df[train_df[his_pow_feats].isna().sum(axis1)0]class MyDataSet(Dataset):def __init__(self,train_df_drop_na,his_pow_feats):train_df_drop_naself.train_df train_df_drop_na.reset_index(dropTrue)def __len__(self):return len(self.train_df)def __getitem__(self,item):label self.train_df.loc[item,passenger_flow]id_encoder self.train_df.loc[item,line_label]his_feats_list self.train_df.loc[item,his_pow_feats].values.tolist()return {input_ids:torch.tensor(id_encoder,dtypetorch.long),his_feats:torch.as_tensor(his_feats_list ,dtypetorch.float32).unsqueeze(-1),labels:torch.tensor(label,dtypetorch.float32)}RANDOM_SEED 1023
df_train, df_test train_test_split(train_df_drop_na, test_size0.2, random_stateRANDOM_SEED)
df_val, df_test train_test_split(df_test, test_size0.5, random_stateRANDOM_SEED)
df_train.shape, df_val.shape, df_test.shapedef create_data_loader(train_df_drop_na,his_pow_feats,batch_size32):ds MyDataSet(train_df_drop_na,his_pow_feats)return DataLoader(ds,batch_sizebatch_size)
BATCH_SIZE 32
train_data_loader create_data_loader(df_train,his_pow_featshis_pow_feats,batch_sizeBATCH_SIZE)
val_data_loader create_data_loader(df_val, his_pow_featshis_pow_feats,batch_sizeBATCH_SIZE)
test_data_loader create_data_loader(df_test,his_pow_featshis_pow_feats,batch_sizeBATCH_SIZE)#train_df[cols]
# 7.定义LSTM网络
class GRUModel(nn.Module):def __init__(self, feature_size, hidden_size, num_layers, output_size):super(GRUModel, self).__init__()self.hidden_size hidden_size # 隐层大小self.num_layers num_layers # lstm层数# feature_size为特征维度就是每个时间点对应的特征数量这里为1self.gru nn.GRU(feature_size, hidden_size, num_layers, batch_firstTrue,bidirectionalTrue)self.layer_norm nn.LayerNorm(hidden_size*2)self.fc nn.Linear(hidden_size*22, output_size)self.embedding nn.Embedding(9, 2)def forward(self, x,id_label, hiddenNone):#print(x.shape)batch_size x.shape[0] # 获取批次大小 batch, time_stamp , feat_size# 初始化隐层状态h_0 x.data.new(2*self.num_layers, batch_size, self.hidden_size).fill_(0).float()if hidden is not None:h_0 hidden#print(h_0.size)# GRU 运算output, hidden self.gru(x,h_0)output self.layer_norm(output)last_output output[:, -1, :]#print(output,last_output.shape)embed self.embedding(id_label)#print(embed,embed.shape)#print(output,output.shape)concatenated torch.cat((embed, last_output), dim1)#print(concatenated.shape)# 全连接层output self.fc(concatenated) # 形状为batch_size * timestep, 1#print(output.shape)# 我们只需要返回最后一个时间片的数据即可return output
model GRUModel(config.feature_size, config.hidden_size, config.num_layers, config.output_size) # 定义LSTM网络loss_function nn.L1Loss() # 定义损失函数
# class MAPELoss(nn.Module):
# def __init__(self):
# super(MAPELoss, self).__init__()# def forward(self, y_pred, y_true):
# epsilon 1e-8 # 用于避免除以零的小常数
# absolute_error torch.abs(y_true - y_pred)
# relative_error absolute_error / (torch.abs(y_true) epsilon)
# mape torch.mean(relative_error) * 100
# return mape
# loss_function MAPELoss() # 定义损失函数optimizer torch.optim.AdamW(model.parameters(), lr0.01) # 定义优化器
from tqdm import tqdm# 8.模型训练
for epoch in range(500):model.train()running_loss 0train_bar tqdm(train_data_loader) # 形成进度条for data in train_bar:x_train, y_train data[his_feats], data[labels] # 解包迭代器中的X和Yoptimizer.zero_grad()y_train_pred model(x_train,data[input_ids])loss loss_function(y_train_pred, y_train.reshape(-1, 1))loss.backward()optimizer.step()running_loss loss.item()train_bar.desc train epoch[{}/{}] loss:{:.3f}.format(epoch 1,config.epochs,loss)# 模型验证model.eval()test_loss 0with torch.no_grad():test_bar tqdm(val_data_loader)for data in test_bar:x_test, y_test data[his_feats], data[labels]y_test_pred model(x_test, data[input_ids])test_loss loss_function(y_test_pred, y_test.reshape(-1, 1))if test_loss config.best_loss:config.best_loss test_losstorch.save(model.state_dict(), save_path)print(Finished Training)