From 01197a2ba51b7d19ca38b982c255f3e6521bbc5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E8=8B=8F=E9=9D=A9=E5=B2=9A?= <13190755+sugotland@user.noreply.gitee.com> Date: Fri, 14 Jul 2023 03:55:56 +0000 Subject: [PATCH] =?UTF-8?q?=E5=89=8D=E9=A6=88=E5=9E=8B=E7=A5=9E=E7=BB=8F?= =?UTF-8?q?=E7=BD=91=E7=BB=9C=E9=A2=84=E6=B5=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 苏革岚 <13190755+sugotland@user.noreply.gitee.com> --- 共享单车数据分析/src/main.py | 135 +++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 共享单车数据分析/src/main.py diff --git a/共享单车数据分析/src/main.py b/共享单车数据分析/src/main.py new file mode 100644 index 0000000..abafeb5 --- /dev/null +++ b/共享单车数据分析/src/main.py @@ -0,0 +1,135 @@ +import torch +import pandas as pd +import numpy as np +import os +import time +import torch.optim as optim +import matplotlib.pyplot as plt + +train_data = pd.read_csv('../train.csv') +test_data = pd.read_csv('../test.csv') +#print(train_data) +datetime=train_data['datetime'].values +season=train_data['season'].values +holiday=train_data['holiday'].values +workingday=train_data['workingday'].values +weather=train_data['weather'].values +temp=train_data['temp'].values +atemp=train_data['atemp'].values +humidity=train_data['humidity'].values +windspeed=train_data['windspeed'].values +casual=train_data['casual'].values +registered=train_data['registered'].values + +count=train_data['count'].values + +''' +转换datetime,去掉不必要的符号和0 +''' +def datetime_convert(datetime): + for i in range(len(datetime)): + datetime[i]=datetime[i].replace("-", "").replace(" ", "").replace(":00:00","") + datetime[i]=int(datetime[i]) + return datetime +datetime_convert(datetime) +train_data['datetime']=datetime +datetime_convert(test_data['datetime'].values) +test_data['datetime']=test_data['datetime'].values +''' +标准化数据 +''' +def normalize(ndarry): + ndarry=(ndarry-ndarry.mean())/ndarry.std() + return ndarry + +for i in range(12): + train_data.iloc[:,i]=normalize(train_data.iloc[:,i]) + + + + + +X=torch.tensor(train_data.iloc[:,:9].to_numpy().astype(float),dtype=torch.float32) +Y=torch.tensor(train_data.iloc[:,9:12].to_numpy().astype(float),dtype=torch.float32) +X_test=torch.tensor(test_data.iloc[:,:9].to_numpy().astype(float),dtype=torch.float32) +torch_train_dataset = torch.utils.data.TensorDataset(X, Y) +batch_size = 10 +torch.manual_seed(seed=2023) + +training, vertification = torch.utils.data.random_split(torch_train_dataset, [10000, 886], ) +training_data = torch.utils.data.DataLoader(training,batch_size=batch_size,shuffle=True) +vertification_data = torch.utils.data.DataLoader(training,batch_size=batch_size,shuffle=True) + +feature_number = 9 # 设置特征数目 +out_prediction = 3 # 设置输出数目 +learning_rate = 0.01 # 设置学习率 +epochs = 10 # 设置训练代数 + + +class Net(torch.nn.Module): + def __init__(self, n_feature, n_output, n_neuron1, n_neuron2, + n_layer): # n_feature为特征数目,这个数字不能随便取,n_output为特征对应的输出数目,也不能随便取 + self.n_feature = n_feature + self.n_output = n_output + self.n_neuron1 = n_neuron1 + self.n_neuron2 = n_neuron2 + self.n_layer = n_layer + super(Net, self).__init__() + self.input_layer = torch.nn.Linear(self.n_feature, self.n_neuron1) # 输入层 + self.hidden1 = torch.nn.Linear(self.n_neuron1, self.n_neuron2) # 1类隐藏层 + self.hidden2 = torch.nn.Linear(self.n_neuron2, self.n_neuron2) # 2类隐藏 + self.predict = torch.nn.Linear(self.n_neuron2, self.n_output) # 输出层 + + def forward(self, x): + '''定义前向传递过程''' + out = self.input_layer(x) + out = torch.relu(out) # 使用relu函数非线性激活 + out = self.hidden1(out) + out = torch.relu(out) + for i in range(self.n_layer): + out = self.hidden2(out) + out = torch.relu(out) + out = self.predict( # 回归问题最后一层不需要激活函数 + out + ) # 除去feature_number与out_prediction不能随便取,隐藏层数与其他神经元数目均可以适当调整以得到最佳预测效果 + #print(out.shape) + return out + +net = Net(n_feature=feature_number, + n_output=out_prediction, + n_layer=1, + n_neuron1=20, + n_neuron2=20) # 这里直接确定了隐藏层数目以及神经元数目,实际操作中需要遍历 +optimizer = optim.Adam(net.parameters(), learning_rate) # 使用Adam算法更新参数 +criteon = torch.nn.MSELoss() # 误差计算公式,回归问题采用均方误差 +average_losses=[] +for epoch in range(epochs): # 整个数据集迭代次数 + net.train() # 启动训练模式 + for batch_idx, (data, target) in enumerate(training_data): + logits = net.forward(data) # 前向计算结果(预测结果) + loss = criteon(logits, target) # 计算损失 + optimizer.zero_grad() # 梯度清零 + loss.backward() # 后向传递过程 + optimizer.step() # 优化权重与偏差矩阵 + #print(logits) + + logit = [] # 这个是验证集,可以根据验证集的结果进行调参,这里根据验证集的结果选取最优的神经网络层数与神经元数目 + target = [] + net.eval() # 启动测试模式 + for data, targets in vertification: # 输出验证集的平均误差 + logits = net.forward(data).detach().numpy() + targets = targets.detach().numpy() + target.append(targets[0]) + logit.append(logits[0]) + average_loss = criteon(torch.tensor(logit), torch.tensor(target)) + average_losses.append(average_loss) + print("epoch={},the average loss is {}".format(epoch,average_loss)) + +Y_test=net.forward(X_test) +print(Y_test) +xx=range(epochs) +yy=average_losses +plt.xlim(0,epochs) +plt.ylim(0,10) +plt.plot(xx,yy) +plt.show() \ No newline at end of file