From 153ffb8370828a9589c20d71c5dcfd1e5e8ae9b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=83=AD=E4=BC=9F=E9=92=8A?= <13190875+guo-weizhao@user.noreply.gitee.com> Date: Sun, 16 Jul 2023 03:14:43 +0000 Subject: [PATCH] =?UTF-8?q?=E4=BD=9C=E4=B8=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 郭伟钊 <13190875+guo-weizhao@user.noreply.gitee.com> --- .../第3组-夏添/homework/小作业水样.ipynb | 600 ++++++++++++++++++ .../第3组-夏添/homework/服装分类.ipynb | 214 +++++++ 2 files changed, 814 insertions(+) create mode 100644 1、人才招聘数据分析/第3组-夏添/homework/小作业水样.ipynb create mode 100644 1、人才招聘数据分析/第3组-夏添/homework/服装分类.ipynb diff --git a/1、人才招聘数据分析/第3组-夏添/homework/小作业水样.ipynb b/1、人才招聘数据分析/第3组-夏添/homework/小作业水样.ipynb new file mode 100644 index 0000000..32d38e9 --- /dev/null +++ b/1、人才招聘数据分析/第3组-夏添/homework/小作业水样.ipynb @@ -0,0 +1,600 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "1996849f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 161 entries, 0 to 160\n", + "Data columns (total 9 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 0 161 non-null float64\n", + " 1 1 161 non-null float64\n", + " 2 2 161 non-null float64\n", + " 3 3 161 non-null float64\n", + " 4 4 161 non-null float64\n", + " 5 5 161 non-null float64\n", + " 6 6 161 non-null float64\n", + " 7 7 161 non-null float64\n", + " 8 8 161 non-null float64\n", + "dtypes: float64(9)\n", + "memory usage: 11.4 KB\n", + "Try(\n", + " (unil1): Sequential(\n", + " (0): Conv1d(1, 16, kernel_size=(3,), stride=(1,), padding=(1,))\n", + " (1): ReLU()\n", + " (2): Conv1d(16, 32, kernel_size=(3,), stride=(1,), padding=(1,))\n", + " (3): ReLU()\n", + " (4): Conv1d(32, 16, kernel_size=(3,), stride=(1,), padding=(1,))\n", + " (5): ReLU()\n", + " (6): Flatten(start_dim=1, end_dim=-1)\n", + " (7): Linear(in_features=144, out_features=100, bias=True)\n", + " (8): ReLU()\n", + " (9): Dropout(p=0.5, inplace=False)\n", + " (10): Linear(in_features=100, out_features=100, bias=True)\n", + " (11): ReLU()\n", + " (12): Dropout(p=0.5, inplace=False)\n", + " (13): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n", + " (14): Linear(in_features=100, out_features=5, bias=True)\n", + " )\n", + ")\n", + "train,loss 0 -0.19760972261428833\n", + "train,acc 0 0.175\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 0 0.1059190031152648\n", + "train,loss 1 -0.15465295314788818\n", + "train,acc 1 0.2\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\16560\\AppData\\Local\\Temp\\ipykernel_5848\\317761143.py:95: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requires_grad_(True), rather than torch.tensor(sourceTensor).\n", + " x_train=torch.tensor(x_train,dtype=torch.float)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 1 0.11838006230529595\n", + "train,loss 2 -0.26933109760284424\n", + "train,acc 2 0.15625\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 2 0.09657320872274143\n", + "train,loss 3 -0.21968130767345428\n", + "train,acc 3 0.2\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 3 0.11838006230529595\n", + "train,loss 4 -0.24240297079086304\n", + "train,acc 4 0.26875\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "3.0 8 0.375\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 4 0.1526479750778816\n", + "train,loss 5 -0.16728299856185913\n", + "train,acc 5 0.2375\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 1 0.0\n", + "test,acc 5 0.13707165109034267\n", + "train,loss 6 -0.19944357872009277\n", + "train,acc 6 0.24375\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "2.0 8 0.25\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 6 0.14018691588785046\n", + "train,loss 7 -0.197749525308609\n", + "train,acc 7 0.225\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 7 0.1308411214953271\n", + "train,loss 8 -0.17148229479789734\n", + "train,acc 8 0.20625\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "2.0 8 0.25\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 8 0.12149532710280374\n", + "train,loss 9 -0.2207236886024475\n", + "train,acc 9 0.18125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "0.0 8 0.0\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "1.0 8 0.125\n", + "0.0 8 0.0\n", + "0.0 1 0.0\n", + "test,acc 9 0.10903426791277258\n" + ] + } + ], + "source": [ + "# -*- coding: utf-8 -*-\n", + "\"\"\"\n", + "Created on Fri Jul 14 09:58:52 2023\n", + "\n", + "@author: 16560\n", + "\"\"\"\n", + "import os\n", + "import matplotlib.pyplot as plt\n", + "from PIL import ImageStat \n", + "from PIL import Image\n", + "import numpy as np\n", + "import pandas as pd\n", + "import torch\n", + "import torch.nn.functional as F\n", + "import torch.optim as optim\n", + "from torch.utils.data import DataLoader\n", + "from torch import nn\n", + "# 我们需要选取仅包括水样的图像部分来进行分析\n", + "data_root_path='C:/Users/16560/Desktop/小学期/images/'\n", + "# =============================================================================\n", + "# plt.title('水色样本 '+imgFile+' 分辨率为'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\n", + "# plt.show()\n", + "# =============================================================================\n", + "\n", + "# 加载图像统计信息模块(注:也可以直接通过颜色通道来计算)\n", + "\n", + "# 遍历全体图像进行快速检查\n", + "size = 100\n", + "imgWidth = [] # 图像宽度\n", + "imgHeight = [] # 图像高度\n", + "imgRrange = [] # 图像红色通道极差\n", + "imgGrange = [] # 图像绿色通道极差\n", + "imgBrange = [] # 图像蓝色通道极差\n", + "\n", + "newImgs = [] # 获得选取后的图像作为模型训练和验证数据\n", + "\n", + "imgFiles = os.listdir(data_root_path)\n", + "for imgFile in imgFiles:\n", + " img = Image.open(os.path.join(data_root_path,imgFile))\n", + " imgWidth.append(img.size[0])\n", + " imgHeight.append(img.size[1])\n", + " \n", + " # 获得图像中心区域大小为size的图像块\n", + " cx, cy = (int(i/2) for i in img.size)\n", + " box = (cx-50, cy-50, cx+50, cy+50)\n", + " region = img.crop(box)\n", + " \n", + " # 计算选取图像块的标准差\n", + " stat = ImageStat.Stat(region)\n", + " imgRrange.append(stat.extrema[0][1]-stat.extrema[0][0])\n", + " imgGrange.append(stat.extrema[1][1]-stat.extrema[1][0])\n", + " imgBrange.append(stat.extrema[2][1]-stat.extrema[2][0])\n", + " \n", + " newImgs.append(region)\n", + "\n", + "# 构建训练数据集和分类标签\n", + "data = []\n", + "dy = []\n", + "for i, img in enumerate(newImgs):\n", + " r, g, b = np.split(np.array(img), 3, axis = 2)\n", + " \n", + " #计算一阶矩\n", + " r_m1 = np.mean(r)\n", + " g_m1 = np.mean(g)\n", + " b_m1 = np.mean(b)\n", + " \n", + " #二阶矩\n", + " r_m2 = np.std(r)\n", + " g_m2 = np.std(g)\n", + " b_m2 = np.std(b)\n", + " \n", + " #三阶矩\n", + " r_m3 = np.mean(abs(r - r.mean())**3)**(1/3)\n", + " g_m3 = np.mean(abs(g - g.mean())**3)**(1/3)\n", + " b_m3 = np.mean(abs(b - b.mean())**3)**(1/3)\n", + " \n", + " # 构造新数据集\n", + " df = np.array([r_m1,g_m1,b_m1,r_m2,g_m2,b_m2,r_m3,g_m3,b_m3])\n", + " data.append(df)\n", + " \n", + " # 保存对应的分类标签\n", + " dy.append(int(imgFiles[i][0]))\n", + "\n", + "dy = np.array(dy)\n", + "data = pd.DataFrame(np.array(data))\n", + "data.info()\n", + "data.head()\n", + "\n", + "for i in range(dy.size):\n", + " dy[i]-=1\n", + " \n", + "y_train=torch.tensor(dy,dtype=torch.long)\n", + "x_train = torch.from_numpy(data.values)\n", + "x_train=x_train.reshape(161,1,9)\n", + "x_train=torch.tensor(x_train,dtype=torch.float)\n", + "\n", + "epochs=10\n", + "batch_size=8\n", + "\n", + "data_set=torch.utils.data.TensorDataset(x_train,y_train)\n", + "train_loader=DataLoader(dataset=data_set,batch_size=batch_size,shuffle=True)\n", + "test_loader=DataLoader(dataset=data_set,batch_size=batch_size,shuffle=True)\n", + "\n", + "class Try(nn.Module):\n", + " def __init__(self):\n", + " super(Try,self).__init__()\n", + " self.unil1=nn.Sequential(\n", + " nn.Conv1d(1,16,kernel_size=3,stride=1,padding=1),\n", + " nn.ReLU(),\n", + " nn.Conv1d(16,32,kernel_size=3,stride=1,padding=1),\n", + " nn.ReLU(),\n", + " nn.Conv1d(32,16,kernel_size=3,stride=1,padding=1),\n", + " nn.ReLU(),\n", + " nn.Flatten(),\n", + " nn.Linear(144,100),\n", + " nn.ReLU(),\n", + " nn.Dropout(p=0.5),\n", + " nn.Linear(100,100),\n", + " nn.ReLU(),\n", + " nn.Dropout(p=0.5),\n", + " nn.BatchNorm1d(100),\n", + " nn.Linear(100,5)\n", + " )\n", + " # tmp=torch.rand(5,6,5)\n", + " # out=self.model(tmp)\n", + " # print(out.shape)\n", + " # print(out)\n", + " def forward(self,x):\n", + " logits=self.unil1(x)\n", + " logits=F.softmax(logits,dim=1)\n", + " return logits\n", + " \n", + "criteon=nn.NLLLoss()\n", + "optimizer=optim.Adam(Try().parameters(),lr=1e-2,weight_decay=1e-2)\n", + "model=Try()\n", + "\n", + "print(model)\n", + "for epoch in range(epochs):\n", + " total_correct=0\n", + " total_num=0\n", + " model.train()\n", + " for batchidx,(train_data1,train_label1) in enumerate(train_loader):\n", + " if train_label1.shape[0]>=8:\n", + " logits=model(train_data1)\n", + " loss=criteon(logits,train_label1)\n", + " pred=logits.argmax(dim=1)\n", + " total_correct+=torch.eq(pred,train_label1).float().sum().item()\n", + " total_num+=train_data1.size(0)\n", + " optimizer.zero_grad()\n", + " loss.backward()\n", + " optimizer.step()\n", + " print('train,loss',epoch,loss.item())\n", + " acc=total_correct/total_num\n", + " print('train,acc',epoch,acc)\n", + " model.eval() \n", + " with torch.no_grad():\n", + " for test_data1,test_label1 in test_loader:\n", + " logits=model(test_data1)\n", + " pred=logits.argmax(dim=1)\n", + " sb=torch.eq(pred,test_label1)\n", + " sb1=torch.eq(pred,test_label1)\n", + " total_correct+=torch.eq(pred,test_label1).float().sum().item()\n", + " total_num+=test_data1.size(0)\n", + " #print(torch.eq(pred,test_label1))\n", + " print(torch.eq(pred,test_label1).float().sum().item(),test_data1.size(0),torch.eq(pred,test_label1).float().sum().item()/test_data1.size(0))\n", + " acc=total_correct/total_num\n", + " print('test,acc',epoch,acc)\n", + "\n", + "# =============================================================================\n", + "# def main():\n", + "# net=Try()\n", + "# tmp=torch.rand(16,1,9)\n", + "# out=net(tmp)\n", + "# print(out.shape)\n", + "# \n", + "# \n", + "# if __name__ == '__main__':\n", + "# main()\n", + "# =============================================================================\n", + "# =============================================================================\n", + "# name_dict={'1':1,'2':2,'3':3,'4':4,'5':5}\n", + "# data_root_path='C:/Users/16560/Desktop/小学期/images/'\n", + "# test_file_path = data_root_path + \"test.txt\" #测试文件路径\n", + "# train_file_path = data_root_path + \"train.txt\" # 训练文件路径\n", + "# name_data_list = {} # 记录每个类别有哪些图片 key:水果名称 value:图片路径构成的列表\n", + "# \n", + "# def save_train_test_file(path, name):\n", + "# if name not in name_data_list: # 该类别水果不在字典中,则新建一个列表插入字典\n", + "# img_list = []\n", + "# img_list.append(path) # 将图片路径存入列表\n", + "# name_data_list[name] = img_list # 将图片列表插入字典\n", + "# else: # 该类别水果在字典中,直接添加到列表\n", + "# name_data_list[name].append(path)\n", + "# \n", + "# # 遍历数据集下面每个子目录,将图片路径写入上面的字典\n", + "# dirs = os.listdir(data_root_path) # 列出数据集目下所有的文件和子目录\n", + "# imgs=os.listdir(data_root_path)\n", + "# for img in imgs:\n", + "# name=img[0]\n", + "# if name !='t':\n", + "# save_train_test_file(data_root_path+img, #拼图片完整路径\n", + "# name) # 以目录名称作为类别名称\n", + "# \n", + "# # 将name_data_list字典中的内容写入文件\n", + "# ## 清空训练集和测试集文件\n", + "# with open(test_file_path, \"w\") as f:\n", + "# pass\n", + "# \n", + "# with open(train_file_path, \"w\") as f:\n", + "# pass\n", + "# \n", + "# # 遍历字典,将字典中的内容写入训练集和测试集\n", + "# for name, img_list in name_data_list.items():\n", + "# i = 0\n", + "# num = len(img_list) # 获取每个类别图片数量\n", + "# print(\"%s: %d张\" % (name, num))\n", + "# # 写训练集和测试集\n", + "# for img in img_list:\n", + "# if i % 10 == 0: # 每10笔写一笔测试集\n", + "# with open(test_file_path, \"a\") as f: #以追加模式打开测试集文件\n", + "# line = \"%s\\t%d\\n\" % (img, name_dict[name]) # 拼一行\n", + "# f.write(line) # 写入文件\n", + "# else: # 训练集\n", + "# with open(train_file_path, \"a\") as f: #以追加模式打开测试集文件\n", + "# line = \"%s\\t%d\\n\" % (img, name_dict[name]) # 拼一行\n", + "# f.write(line) # 写入文件\n", + "# \n", + "# i += 1 # 计数器加1\n", + "# \n", + "# print(\"数据预处理完成.\")\n", + "# \n", + "# \n", + "# =============================================================================\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "85aad8da", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "8e9f8ce4", + "metadata": {}, + "outputs": [ + { + "ename": "AttributeError", + "evalue": "module 'matplotlib' has no attribute 'figure'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "Input \u001b[1;32mIn [3]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mplt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m(figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m8\u001b[39m,\u001b[38;5;241m8\u001b[39m))\n", + "File \u001b[1;32mD:\\anaconda\\lib\\site-packages\\matplotlib\\_api\\__init__.py:222\u001b[0m, in \u001b[0;36mcaching_module_getattr..__getattr__\u001b[1;34m(name)\u001b[0m\n\u001b[0;32m 220\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m props:\n\u001b[0;32m 221\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m props[name]\u001b[38;5;241m.\u001b[39m\u001b[38;5;21m__get__\u001b[39m(instance)\n\u001b[1;32m--> 222\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m(\n\u001b[0;32m 223\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodule \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__module__\u001b[39m\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m has no attribute \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m!r}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", + "\u001b[1;31mAttributeError\u001b[0m: module 'matplotlib' has no attribute 'figure'" + ] + } + ], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6fa60f2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/1、人才招聘数据分析/第3组-夏添/homework/服装分类.ipynb b/1、人才招聘数据分析/第3组-夏添/homework/服装分类.ipynb new file mode 100644 index 0000000..6deee45 --- /dev/null +++ b/1、人才招聘数据分析/第3组-夏添/homework/服装分类.ipynb @@ -0,0 +1,214 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "8f9469b5", + "metadata": {}, + "outputs": [ + { + "ename": "FileNotFoundError", + "evalue": "[Errno 2] No such file or directory: 'fashion-mnist_train.csv'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", + "Input \u001b[1;32mIn [1]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[0;32m 105\u001b[0m ave_acc\u001b[38;5;241m=\u001b[39mtal_acc\u001b[38;5;241m/\u001b[39mval_num\n\u001b[0;32m 106\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ave_acc,ave_loss\n\u001b[1;32m--> 108\u001b[0m x_train, y_train \u001b[38;5;241m=\u001b[39m \u001b[43mload_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 109\u001b[0m x_validation, y_validation \u001b[38;5;241m=\u001b[39m load_data(test_name)\n\u001b[0;32m 110\u001b[0m train_num \u001b[38;5;241m=\u001b[39m y_train\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m]\n", + "Input \u001b[1;32mIn [1]\u001b[0m, in \u001b[0;36mload_data\u001b[1;34m(path)\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload_data\u001b[39m(path):\n\u001b[0;32m 21\u001b[0m li \u001b[38;5;241m=\u001b[39m []\n\u001b[1;32m---> 22\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m,\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[0;32m 23\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m csv\u001b[38;5;241m.\u001b[39mreader(f):\n\u001b[0;32m 24\u001b[0m li\u001b[38;5;241m.\u001b[39mappend(i)\n", + "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'fashion-mnist_train.csv'" + ] + } + ], + "source": [ + "import csv\n", + "import torch\n", + "from torch import nn,squeeze\n", + "import torch.optim as optim\n", + "from torch.nn import functional as fun\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import time\n", + "import os\n", + "\n", + "train_name = \"fashion-mnist_train.csv\"\n", + "test_name = \"fashion-mnist_test.csv\"\n", + "#训练结果存储地址\n", + "save_path=\"./测试结果/\"\n", + "\n", + "# 自动导入所在路径的csv文件并训练\n", + "# 训练后的结果包括训练完成的模型和损失与准确率随轮数的变化图\n", + "# 训练结果将会存储在 [所在路径]/测试结果/[训练完成时日期] 下 \n", + "\n", + "def load_data(path):\n", + " li = []\n", + " with open(path,'r') as f:\n", + " for i in csv.reader(f):\n", + " li.append(i)\n", + " del li[0]\n", + " for i in range(len(li)):\n", + " li[i] = [int(data) for data in li[i]]\n", + " a = torch.tensor(li,dtype=torch.long).contiguous()\n", + " labels = a[:,0].clone().detach()\n", + " image = a[:,1:].clone().detach()\n", + " image = image.reshape(-1,1,28,28).float()\n", + " return image,labels\n", + "\n", + "class CNN(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " #卷积层\n", + " self.fea = nn.Sequential(\n", + " nn.Conv2d(1,16,5),\n", + " nn.ReLU(),\n", + " nn.MaxPool2d(2,stride = 2),\n", + " nn.Dropout(),\n", + " nn.Conv2d(16,32,5),\n", + " nn.ReLU(),\n", + " nn.MaxPool2d(2,stride = 2),\n", + " nn.Dropout(),\n", + " nn.Flatten() \n", + " )\n", + "\n", + " first_length = 32*4*4\n", + " self.fc1=nn.Linear(first_length,128)\n", + " self.fc2=nn.Linear(128,128)\n", + " self.fc3=nn.Linear(128,10)\n", + "\n", + " self.dr1=nn.Dropout()\n", + " self.dr2=nn.Dropout()\n", + "\n", + " self.bn=nn.BatchNorm1d(first_length)\n", + " # self.cons = ConSca()\n", + "\n", + "\n", + " def forward(self,x):\n", + " \n", + " x = self.fea(x)\n", + " x = self.bn(x)\n", + " x = fun.relu(self.fc1(x))\n", + " x = self.dr1(x)\n", + " x = fun.relu(self.fc2(x))\n", + " x = self.dr2(x)\n", + " x = fun.log_softmax(self.fc3(x),dim=1)\n", + " return x\n", + "\n", + "\n", + "lossFun=nn.NLLLoss()\n", + "epochs=10\n", + "batch_size=500\n", + "lr=1e-4\n", + "train_loss=[]\n", + "val_loss=[]\n", + "train_acc=[]\n", + "val_acc=[]\n", + "net=CNN() \n", + "optimizer=optim.Adam(net.parameters(),lr=lr)\n", + "\n", + "def verify(x_validation,y_validation,net=net,batch_size=batch_size):\n", + " #分批次地计算的损失和准确度以减轻内存负担\n", + " net.eval()\n", + " val_num=y_validation.shape[0]\n", + " tal_loss=0\n", + " tal_acc=0\n", + " for i in range(val_num//batch_size+1):\n", + " if (i+1)*batch_size<=val_num:\n", + " index=list(range(i*batch_size,(i+1)*batch_size))\n", + " elif i*batch_size