Merge branch 'master' of https://gitee.com/lzhcoooode/machine_learning_projects
This commit is contained in:
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -0,0 +1,134 @@
|
||||
import torch
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import os
|
||||
import time
|
||||
import torch.optim as optim
|
||||
import matplotlib.pyplot as plt
|
||||
|
||||
train_data = pd.read_csv('../train.csv')
|
||||
test_data = pd.read_csv('../test.csv')
|
||||
#print(train_data)
|
||||
datetime=train_data['datetime'].values
|
||||
season=train_data['season'].values
|
||||
holiday=train_data['holiday'].values
|
||||
workingday=train_data['workingday'].values
|
||||
weather=train_data['weather'].values
|
||||
temp=train_data['temp'].values
|
||||
atemp=train_data['atemp'].values
|
||||
humidity=train_data['humidity'].values
|
||||
windspeed=train_data['windspeed'].values
|
||||
casual=train_data['casual'].values
|
||||
registered=train_data['registered'].values
|
||||
|
||||
count=train_data['count'].values
|
||||
|
||||
'''
|
||||
转换datetime,去掉不必要的符号和0
|
||||
'''
|
||||
def datetime_convert(datetime):
|
||||
for i in range(len(datetime)):
|
||||
datetime[i]=datetime[i].replace("-", "").replace(" ", "").replace(":00:00","")
|
||||
datetime[i]=int(datetime[i])
|
||||
return datetime
|
||||
datetime_convert(datetime)
|
||||
train_data['datetime']=datetime
|
||||
datetime_convert(test_data['datetime'].values)
|
||||
test_data['datetime']=test_data['datetime'].values
|
||||
'''
|
||||
标准化数据
|
||||
'''
|
||||
def normalize(ndarry):
|
||||
ndarry=(ndarry-ndarry.mean())/ndarry.std()
|
||||
return ndarry
|
||||
|
||||
for i in range(12):
|
||||
train_data.iloc[:,i]=normalize(train_data.iloc[:,i])
|
||||
|
||||
|
||||
|
||||
|
||||
X=torch.tensor(train_data.iloc[:,:9].to_numpy().astype(float),dtype=torch.float32)
|
||||
Y=torch.tensor(train_data.iloc[:,9:12].to_numpy().astype(float),dtype=torch.float32)
|
||||
X_test=torch.tensor(test_data.iloc[:,:9].to_numpy().astype(float),dtype=torch.float32)
|
||||
torch_train_dataset = torch.utils.data.TensorDataset(X, Y)
|
||||
batch_size = 10
|
||||
torch.manual_seed(seed=2023)
|
||||
|
||||
training, vertification = torch.utils.data.random_split(torch_train_dataset, [10000, 886], )
|
||||
training_data = torch.utils.data.DataLoader(training,batch_size=batch_size,shuffle=True)
|
||||
vertification_data = torch.utils.data.DataLoader(training,batch_size=batch_size,shuffle=True)
|
||||
|
||||
feature_number = 9 # 设置特征数目
|
||||
out_prediction = 3 # 设置输出数目
|
||||
learning_rate = 0.01 # 设置学习率
|
||||
epochs = 10 # 设置训练代数
|
||||
|
||||
|
||||
class Net(torch.nn.Module):
|
||||
def __init__(self, n_feature, n_output, n_neuron1, n_neuron2,
|
||||
n_layer): # n_feature为特征数目,这个数字不能随便取,n_output为特征对应的输出数目,也不能随便取
|
||||
self.n_feature = n_feature
|
||||
self.n_output = n_output
|
||||
self.n_neuron1 = n_neuron1
|
||||
self.n_neuron2 = n_neuron2
|
||||
self.n_layer = n_layer
|
||||
super(Net, self).__init__()
|
||||
self.input_layer = torch.nn.Linear(self.n_feature, self.n_neuron1) # 输入层
|
||||
self.hidden1 = torch.nn.Linear(self.n_neuron1, self.n_neuron2) # 1类隐藏层
|
||||
self.hidden2 = torch.nn.Linear(self.n_neuron2, self.n_neuron2) # 2类隐藏
|
||||
self.predict = torch.nn.Linear(self.n_neuron2, self.n_output) # 输出层
|
||||
|
||||
def forward(self, x):
|
||||
'''定义前向传递过程'''
|
||||
out = self.input_layer(x)
|
||||
out = torch.relu(out) # 使用relu函数非线性激活
|
||||
out = self.hidden1(out)
|
||||
out = torch.relu(out)
|
||||
for i in range(self.n_layer):
|
||||
out = self.hidden2(out)
|
||||
out = torch.relu(out)
|
||||
out = self.predict( # 回归问题最后一层不需要激活函数
|
||||
out
|
||||
) # 除去feature_number与out_prediction不能随便取,隐藏层数与其他神经元数目均可以适当调整以得到最佳预测效果
|
||||
#print(out.shape)
|
||||
return out
|
||||
|
||||
net = Net(n_feature=feature_number,
|
||||
n_output=out_prediction,
|
||||
n_layer=1,
|
||||
n_neuron1=20,
|
||||
n_neuron2=20) # 这里直接确定了隐藏层数目以及神经元数目,实际操作中需要遍历
|
||||
optimizer = optim.Adam(net.parameters(), learning_rate) # 使用Adam算法更新参数
|
||||
criteon = torch.nn.MSELoss() # 误差计算公式,回归问题采用均方误差
|
||||
average_losses=[]
|
||||
for epoch in range(epochs): # 整个数据集迭代次数
|
||||
net.train() # 启动训练模式
|
||||
for batch_idx, (data, target) in enumerate(training_data):
|
||||
logits = net.forward(data) # 前向计算结果(预测结果)
|
||||
loss = criteon(logits, target) # 计算损失
|
||||
optimizer.zero_grad() # 梯度清零
|
||||
loss.backward() # 后向传递过程
|
||||
optimizer.step() # 优化权重与偏差矩阵
|
||||
#print(logits)
|
||||
|
||||
logit = [] # 这个是验证集,可以根据验证集的结果进行调参,这里根据验证集的结果选取最优的神经网络层数与神经元数目
|
||||
target = []
|
||||
net.eval() # 启动测试模式
|
||||
for data, targets in vertification: # 输出验证集的平均误差
|
||||
logits = net.forward(data).detach().numpy()
|
||||
targets = targets.detach().numpy()
|
||||
target.append(targets[0])
|
||||
logit.append(logits[0])
|
||||
average_loss = criteon(torch.tensor(logit), torch.tensor(target))
|
||||
average_losses.append(average_loss)
|
||||
print("epoch={},the average loss is {}".format(epoch,average_loss))
|
||||
|
||||
Y_test=net.forward(X_test)
|
||||
print(Y_test)
|
||||
xx=range(epochs)
|
||||
yy=average_losses
|
||||
plt.xlim(0,epochs)
|
||||
plt.ylim(0,10)
|
||||
plt.plot(xx,yy)
|
||||
plt.show()
|
||||
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+1
-1
@@ -1,7 +1,7 @@
|
||||
田熠 强基数学001
|
||||
李毓曈 强基数学001
|
||||
马启越 强基数学001
|
||||
钟梦原 强基数学002
|
||||
钟孟原 强基数学002
|
||||
吴铭竑 强基数学001
|
||||
张君瑶 应数001
|
||||
李冬旭 应数001
|
||||
@@ -0,0 +1 @@
|
||||
https://tianchi.aliyun.com/dataset/127081
|
||||
@@ -4,5 +4,5 @@
|
||||
王政 统计001 组员
|
||||
李俊 统计001 组员
|
||||
王宪宝 统计001 组员
|
||||
韩飞澈 信计001 组员
|
||||
如斯太木·艾尼瓦尔 统计001 组员
|
||||
如斯太木·艾尼瓦尔 统计001 组员
|
||||
韩飞澈 统计001 组员
|
||||
@@ -1,10 +0,0 @@
|
||||
|
||||
15
|
||||
|
||||
蓝追航 强基数学001 组员
|
||||
陈江河 强基数学001 组长
|
||||
陈骞 强基数学002 组员
|
||||
洪坤 强基数学001 组员
|
||||
孙寿增 强基数学001 组员
|
||||
毕景堃 信计001 组员
|
||||
张未阳 统计001 组员
|
||||
@@ -0,0 +1,10 @@
|
||||
|
||||
第15组分工
|
||||
|
||||
1 陈江河
|
||||
2 陈骞
|
||||
3 洪坤
|
||||
4 蓝追航
|
||||
5 孙寿增
|
||||
6 毕景堃
|
||||
7 张未阳
|
||||
Binary file not shown.
Reference in New Issue
Block a user