平时作业

Signed-off-by: 吴沂钊 <13190667+Yizhao_Wu4926@user.noreply.gitee.com>
This commit is contained in:
吴沂钊
2023-07-15 14:50:33 +00:00
committed by Gitee
parent b9d432a4ea
commit 29098aca79
3 changed files with 1442 additions and 0 deletions
+568
View File
@@ -0,0 +1,568 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "f6e7da09-3bbb-4298-9a81-d3b09d8e6b83",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"['images/1_1.jpg',\n",
" 'images/1_10.jpg',\n",
" 'images/1_11.jpg',\n",
" 'images/1_12.jpg',\n",
" 'images/1_13.jpg',\n",
" 'images/1_14.jpg',\n",
" 'images/1_15.jpg',\n",
" 'images/1_16.jpg',\n",
" 'images/1_17.jpg',\n",
" 'images/1_18.jpg',\n",
" 'images/1_19.jpg',\n",
" 'images/1_2.jpg',\n",
" 'images/1_20.jpg',\n",
" 'images/1_21.jpg',\n",
" 'images/1_22.jpg',\n",
" 'images/1_23.jpg',\n",
" 'images/1_24.jpg',\n",
" 'images/1_25.jpg',\n",
" 'images/1_26.jpg',\n",
" 'images/1_27.jpg',\n",
" 'images/1_28.jpg',\n",
" 'images/1_3.jpg',\n",
" 'images/1_30.jpg',\n",
" 'images/1_31.jpg',\n",
" 'images/1_32.jpg',\n",
" 'images/1_33.jpg',\n",
" 'images/1_34.jpg',\n",
" 'images/1_35.jpg',\n",
" 'images/1_36.jpg',\n",
" 'images/1_37.jpg',\n",
" 'images/1_38.jpg',\n",
" 'images/1_39.jpg',\n",
" 'images/1_4.jpg',\n",
" 'images/1_40.jpg',\n",
" 'images/1_41.jpg',\n",
" 'images/1_42.jpg',\n",
" 'images/1_43.jpg',\n",
" 'images/1_44.jpg',\n",
" 'images/1_45.jpg',\n",
" 'images/1_46.jpg',\n",
" 'images/1_47.jpg',\n",
" 'images/1_48.jpg',\n",
" 'images/1_49.jpg',\n",
" 'images/1_5.jpg',\n",
" 'images/1_51.jpg',\n",
" 'images/1_6.jpg',\n",
" 'images/1_7.jpg',\n",
" 'images/1_8.jpg',\n",
" 'images/1_9.jpg',\n",
" 'images/2_1.jpg',\n",
" 'images/2_10.jpg',\n",
" 'images/2_12.jpg',\n",
" 'images/2_13.jpg',\n",
" 'images/2_14.jpg',\n",
" 'images/2_15.jpg',\n",
" 'images/2_17.jpg',\n",
" 'images/2_18.jpg',\n",
" 'images/2_19.jpg',\n",
" 'images/2_2.jpg',\n",
" 'images/2_20.jpg',\n",
" 'images/2_21.jpg',\n",
" 'images/2_22.jpg',\n",
" 'images/2_23.jpg',\n",
" 'images/2_24.jpg',\n",
" 'images/2_26.jpg',\n",
" 'images/2_27.jpg',\n",
" 'images/2_28.jpg',\n",
" 'images/2_29.jpg',\n",
" 'images/2_3.jpg',\n",
" 'images/2_30.jpg',\n",
" 'images/2_31.jpg',\n",
" 'images/2_32.jpg',\n",
" 'images/2_33.jpg',\n",
" 'images/2_34.jpg',\n",
" 'images/2_35.jpg',\n",
" 'images/2_36.jpg',\n",
" 'images/2_37.jpg',\n",
" 'images/2_38.jpg',\n",
" 'images/2_39.jpg',\n",
" 'images/2_4.jpg',\n",
" 'images/2_40.jpg',\n",
" 'images/2_41.jpg',\n",
" 'images/2_42.jpg',\n",
" 'images/2_43.jpg',\n",
" 'images/2_44.jpg',\n",
" 'images/2_5.jpg',\n",
" 'images/2_6.jpg',\n",
" 'images/2_7.jpg',\n",
" 'images/2_8.jpg',\n",
" 'images/2_9.jpg',\n",
" 'images/3_10.jpg',\n",
" 'images/3_11.jpg',\n",
" 'images/3_12.jpg',\n",
" 'images/3_13.jpg',\n",
" 'images/3_14.jpg',\n",
" 'images/3_15.jpg',\n",
" 'images/3_16.jpg',\n",
" 'images/3_17.jpg',\n",
" 'images/3_18.jpg',\n",
" 'images/3_19.jpg',\n",
" 'images/3_20.jpg',\n",
" 'images/3_21.jpg',\n",
" 'images/3_22.jpg',\n",
" 'images/3_23.jpg',\n",
" 'images/3_25.jpg',\n",
" 'images/3_26.jpg',\n",
" 'images/3_27.jpg',\n",
" 'images/3_28.jpg',\n",
" 'images/3_29.jpg',\n",
" 'images/3_3.jpg',\n",
" 'images/3_30.jpg',\n",
" 'images/3_31.jpg',\n",
" 'images/3_32.jpg',\n",
" 'images/3_34.jpg',\n",
" 'images/3_38.jpg',\n",
" 'images/3_40.jpg',\n",
" 'images/3_42.jpg',\n",
" 'images/3_43.jpg',\n",
" 'images/3_44.jpg',\n",
" 'images/3_45.jpg',\n",
" 'images/3_46.jpg',\n",
" 'images/3_47.jpg',\n",
" 'images/3_48.jpg',\n",
" 'images/3_49.jpg',\n",
" 'images/3_5.jpg',\n",
" 'images/3_50.jpg',\n",
" 'images/3_51.jpg',\n",
" 'images/3_52.jpg',\n",
" 'images/3_53.jpg',\n",
" 'images/3_55.jpg',\n",
" 'images/3_56.jpg',\n",
" 'images/3_57.jpg',\n",
" 'images/3_58.jpg',\n",
" 'images/3_59.jpg',\n",
" 'images/3_65.jpg',\n",
" 'images/3_66.jpg',\n",
" 'images/3_7.jpg',\n",
" 'images/3_71.jpg',\n",
" 'images/3_72.jpg',\n",
" 'images/3_73.jpg',\n",
" 'images/3_74.jpg',\n",
" 'images/3_75.jpg',\n",
" 'images/3_76.jpg',\n",
" 'images/3_77.jpg',\n",
" 'images/3_78.jpg',\n",
" 'images/3_8.jpg',\n",
" 'images/3_9.jpg',\n",
" 'images/4_10.jpg',\n",
" 'images/4_11.jpg',\n",
" 'images/4_13.jpg',\n",
" 'images/4_14.jpg',\n",
" 'images/4_21.jpg',\n",
" 'images/4_6.jpg',\n",
" 'images/4_7.jpg',\n",
" 'images/4_9.jpg',\n",
" 'images/5_1.jpg',\n",
" 'images/5_2.jpg',\n",
" 'images/5_3.jpg',\n",
" 'images/5_4.jpg',\n",
" 'images/5_5.jpg',\n",
" 'images/5_6.jpg']"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"import numpy as np\n",
"import os\n",
"import cv2\n",
"import matplotlib.pyplot as plt\n",
"\n",
"plt.rcParams['font.sans-serif'] = ['SimHei']\n",
"plt.rcParams['axes.unicode_minus'] = False\n",
"\n",
"def getimgnames(path=None):\n",
" \"\"\"\n",
" 获取指定文件夹中的JPG图片名称(含路径)\n",
" :param path: 指定文件夹\n",
" :return: path中的所有JPG图片名称(含路径,例如:./path/image1.jpg\n",
" \"\"\"\n",
" imgnames = []\n",
" filenames = os.listdir(path) # 获取path中的所有文件名\n",
" for i in filenames:\n",
" if re.findall('\\.jpg$', i) != []: # 在所有文件名中找出JPG图片名称\n",
" imgnames.append(os.path.join(path, i)) # 将图片名称和路径合并、保存\n",
" return imgnames\n",
"\n",
"imglist = getimgnames('images/')\n",
"imglist"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "43738e00-e1fa-4d6f-b08c-1e64662a5ce0",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"'\\ndef cut_image(img, imgFile):\\n cx = int(np.size(img, 1))/2\\n cy = int(np.size(img, 0))/2\\n \\n plt.figure(figsize=(8,8))\\n plt.imshow(img)\\n\\n plt.plot([cx-50, cx+50], [cy+50, cy+50], \\'r\\', linewidth=2)\\n plt.plot([cx+50, cx+50], [cy-50, cy+50], \\'r\\', linewidth=2)\\n plt.plot([cx-50, cx+50], [cy-50, cy-50], \\'r\\', linewidth=2)\\n plt.plot([cx-50, cx-50], [cy-50, cy+50], \\'r\\', linewidth=2)\\n plt.annotate(\\'选取的水样窗口\\', xy=(cx+50,cy-50), xytext=(cx+300, cy-300),\\n arrowprops=dict(facecolor=\\'black\\', shrink=0.1))\\n\\n plt.title(\\'水色样本 \\'+imgFile+\\' 分辨率为\\'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\\n plt.show()\\n \\nfor i in range(len(imglist)):\\n img = cv2.imread(imglist[i])\\n cut_image(img, imglist[i])\\n'"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\"\"\"\n",
"def cut_image(img, imgFile):\n",
" cx = int(np.size(img, 1))/2\n",
" cy = int(np.size(img, 0))/2\n",
" \n",
" plt.figure(figsize=(8,8))\n",
" plt.imshow(img)\n",
"\n",
" plt.plot([cx-50, cx+50], [cy+50, cy+50], 'r', linewidth=2)\n",
" plt.plot([cx+50, cx+50], [cy-50, cy+50], 'r', linewidth=2)\n",
" plt.plot([cx-50, cx+50], [cy-50, cy-50], 'r', linewidth=2)\n",
" plt.plot([cx-50, cx-50], [cy-50, cy+50], 'r', linewidth=2)\n",
" plt.annotate('选取的水样窗口', xy=(cx+50,cy-50), xytext=(cx+300, cy-300),\n",
" arrowprops=dict(facecolor='black', shrink=0.1))\n",
"\n",
" plt.title('水色样本 '+imgFile+' 分辨率为'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\n",
" plt.show()\n",
" \n",
"for i in range(len(imglist)):\n",
" img = cv2.imread(imglist[i])\n",
" cut_image(img, imglist[i])\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "65b2b8b6-ed00-47ff-84f1-e22f97d48cdb",
"metadata": {
"tags": []
},
"outputs": [],
"source": [
"# 加载图像统计信息模块(注:也可以直接通过颜色通道来计算)\n",
"from PIL import ImageStat,Image\n",
"\n",
"# 遍历全体图像进行快速检查\n",
"size = 100\n",
"imgPath = './images'\n",
"imgWidth = [] # 图像宽度\n",
"imgHeight = [] # 图像高度\n",
"imgRrange = [] # 图像红色通道极差\n",
"imgGrange = [] # 图像绿色通道极差\n",
"imgBrange = [] # 图像蓝色通道极差\n",
"\n",
"newImgs = [] # 获得选取后的图像作为模型训练和验证数据\n",
"\n",
"imgFiles = os.listdir(imgPath)\n",
"for imgFile in imgFiles:\n",
" img = Image.open(os.path.join(imgPath,imgFile))\n",
" imgWidth.append(img.size[0])\n",
" imgHeight.append(img.size[1])\n",
" \n",
" # 获得图像中心区域大小为size的图像块\n",
" cx, cy = (int(i/2) for i in img.size)\n",
" box = (cx-50, cy-50, cx+50, cy+50)\n",
" region = img.crop(box)\n",
" \n",
" # 计算选取图像块的标准差 分为红绿蓝三种\n",
" stat = ImageStat.Stat(region)\n",
" imgRrange.append(stat.extrema[0][1]-stat.extrema[0][0])\n",
" imgGrange.append(stat.extrema[1][1]-stat.extrema[1][0])\n",
" imgBrange.append(stat.extrema[2][1]-stat.extrema[2][0])\n",
" \n",
" newImgs.append(region)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "527d76c9-7201-4fe6-a835-3a329fb90bc7",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 161 entries, 0 to 160\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 0 161 non-null float64\n",
" 1 1 161 non-null float64\n",
" 2 2 161 non-null float64\n",
" 3 3 161 non-null float64\n",
" 4 4 161 non-null float64\n",
" 5 5 161 non-null float64\n",
" 6 6 161 non-null float64\n",
" 7 7 161 non-null float64\n",
" 8 8 161 non-null float64\n",
"dtypes: float64(9)\n",
"memory usage: 11.4 KB\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>148.6037</td>\n",
" <td>138.6396</td>\n",
" <td>64.3687</td>\n",
" <td>3.633077</td>\n",
" <td>4.128330</td>\n",
" <td>10.499046</td>\n",
" <td>4.254340</td>\n",
" <td>4.802914</td>\n",
" <td>12.057685</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>163.6769</td>\n",
" <td>145.5470</td>\n",
" <td>54.4608</td>\n",
" <td>3.934350</td>\n",
" <td>2.840174</td>\n",
" <td>3.502551</td>\n",
" <td>4.453499</td>\n",
" <td>3.245141</td>\n",
" <td>4.065518</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>153.9430</td>\n",
" <td>147.0834</td>\n",
" <td>71.9601</td>\n",
" <td>2.225657</td>\n",
" <td>1.810979</td>\n",
" <td>3.136608</td>\n",
" <td>2.644340</td>\n",
" <td>2.148726</td>\n",
" <td>3.651915</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>150.3724</td>\n",
" <td>151.3982</td>\n",
" <td>64.3164</td>\n",
" <td>2.037380</td>\n",
" <td>1.521590</td>\n",
" <td>2.728093</td>\n",
" <td>2.375780</td>\n",
" <td>1.819692</td>\n",
" <td>3.173392</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>150.7381</td>\n",
" <td>150.9774</td>\n",
" <td>64.6204</td>\n",
" <td>1.918674</td>\n",
" <td>1.665260</td>\n",
" <td>3.110901</td>\n",
" <td>2.259915</td>\n",
" <td>1.958815</td>\n",
" <td>3.619921</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 \\\n",
"0 148.6037 138.6396 64.3687 3.633077 4.128330 10.499046 4.254340 \n",
"1 163.6769 145.5470 54.4608 3.934350 2.840174 3.502551 4.453499 \n",
"2 153.9430 147.0834 71.9601 2.225657 1.810979 3.136608 2.644340 \n",
"3 150.3724 151.3982 64.3164 2.037380 1.521590 2.728093 2.375780 \n",
"4 150.7381 150.9774 64.6204 1.918674 1.665260 3.110901 2.259915 \n",
"\n",
" 7 8 \n",
"0 4.802914 12.057685 \n",
"1 3.245141 4.065518 \n",
"2 2.148726 3.651915 \n",
"3 1.819692 3.173392 \n",
"4 1.958815 3.619921 "
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"# 构建训练数据集和分类标签\n",
"data = []\n",
"dy = []\n",
"for i, img in enumerate(newImgs):\n",
" r, g, b = np.split(np.array(img), 3, axis = 2)\n",
" \n",
" #计算一阶矩\n",
" r_m1 = np.mean(r)\n",
" g_m1 = np.mean(g)\n",
" b_m1 = np.mean(b)\n",
" \n",
" #二阶矩\n",
" r_m2 = np.std(r)\n",
" g_m2 = np.std(g)\n",
" b_m2 = np.std(b)\n",
" \n",
" #三阶矩\n",
" r_m3 = np.mean(abs(r - r.mean())**3)**(1/3)\n",
" g_m3 = np.mean(abs(g - g.mean())**3)**(1/3)\n",
" b_m3 = np.mean(abs(b - b.mean())**3)**(1/3)\n",
" \n",
" # 构造新数据集\n",
" df = np.array([r_m1,g_m1,b_m1,r_m2,g_m2,b_m2,r_m3,g_m3,b_m3])\n",
" data.append(df)\n",
" \n",
" # 保存对应的分类标签\n",
" dy.append(int(imgFiles[i][0]))\n",
"\n",
"dy = np.array(dy)\n",
"data = pd.DataFrame(np.array(data))\n",
"data.info()\n",
"data.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "208d30d2-1850-45eb-a68d-94e44a5f59eb",
"metadata": {
"tags": []
},
"outputs": [
{
"data": {
"text/plain": [
"((128, 9), (33, 9), (128,))"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"x_train,x_test,y_train,y_test=train_test_split(data,dy,test_size=0.2,random_state=0)\n",
"x_train.shape,x_test.shape,y_train.shape"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "726c20cb-1065-4f7e-9d71-39e261178fd6",
"metadata": {
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0.3939393939393939\n"
]
}
],
"source": [
"from sklearn.metrics import accuracy_score\n",
"from sklearn.neural_network import MLPClassifier\n",
"mlp=MLPClassifier()\n",
"mlp.fit(x_train,y_train)\n",
"y_pred=mlp.predict(x_test)\n",
"print(accuracy_score(y_pred,y_test))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "51804510-c0a3-4a51-9795-0e38a7c9a8f4",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "419b9b44-bcd3-499f-8307-cb9f72ef52b3",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
File diff suppressed because one or more lines are too long
+176
View File
@@ -0,0 +1,176 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "9d091a01",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import re\n",
"import jieba\n",
"import pickle\n",
"from wordcloud import WordCloud\n",
"from sklearn.naive_bayes import GaussianNB\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer\n",
"from sklearn.metrics import confusion_matrix\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"plt.rcParams['font.sans-serif'] = 'SimHei'\n",
"plt.rcParams['axes.unicode_minus'] = False\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "eff92c98",
"metadata": {},
"outputs": [],
"source": [
"def data_process(file='message80W1.csv'):\n",
"#读取数据并进行清洗\n",
" data = pd.read_csv(file, header=None, index_col=0)\n",
" data.columns = ['label', 'message']\n",
" n = 5000\n",
"\n",
" a = data[data['label'] == 0].sample(n)\n",
" b = data[data['label'] == 1].sample(n)\n",
" data_new = pd.concat([a, b], axis=0)\n",
"\n",
" data_dup = data_new['message'].drop_duplicates()\n",
" data_qumin = data_dup.apply(lambda x: re.sub('x', '', x))\n",
"\n",
" jieba.load_userdict('newdic1.txt')\n",
" data_cut = data_qumin.apply(lambda x: jieba.lcut(x))\n",
"\n",
" stopWords = pd.read_csv('stopword.txt', encoding='GB18030', sep='hahaha', header=None)\n",
" stopWords = ['≮', '≯', '≠', '≮', ' ', '会', '月', '日', ''] + list(stopWords.iloc[:, 0])\n",
" data_after_stop = data_cut.apply(lambda x: [i for i in x if i not in stopWords])\n",
" labels = data_new.loc[data_after_stop.index, 'label']\n",
" adata = data_after_stop.apply(lambda x: ' '.join(x))\n",
" return adata, data_after_stop, labels"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3ae80f9",
"metadata": {},
"outputs": [],
"source": [
"def plot_confusion_matrix(cm, classes, normalize=False, title='Confusion matrix', cmap=plt.cm.Blues):\n",
" sns.heatmap(cm, annot=True)\n",
" plt.ylabel('True label')\n",
" plt.xlabel('Predicted label')\n",
" plt.title(title)\n",
" plt.xticks(np.arange(len(classes)), classes)\n",
" plt.yticks(np.arange(len(classes)), classes)\n",
" plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "56dbb0d1",
"metadata": {},
"outputs": [],
"source": [
"\n",
"adata, data_after_stop, lables = data_process()\n",
"data_tr, data_te, labels_tr, labels_te = train_test_split(adata, lables, test_size=0.2)\n",
"countVectorizer = CountVectorizer()\n",
"data_tr = countVectorizer.fit_transform(data_tr)\n",
"X_tr = TfidfTransformer().fit_transform(data_tr.toarray()).toarray()\n",
"data_te = CountVectorizer(vocabulary=countVectorizer.vocabulary_).fit_transform(data_te)\n",
"X_te = TfidfTransformer().fit_transform(data_te.toarray()).toarray()\n",
"model = GaussianNB()\n",
"model.fit(X_tr, labels_tr)\n",
"pred = model.predict(X_te)\n",
"score = model.score(X_te, labels_te)\n",
"with open('model.pkl', 'wb') as f:\n",
" pickle.dump(model, f)\n",
"with open('countVectorizer.pkl', 'wb') as f:\n",
" pickle.dump(countVectorizer, f)\n",
"plot_confusion_matrix(confusion_matrix(labels_te, pred), [1, 0], title=\"模型分类准确率{:.2f}%\".format(score * 100))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cc83543e",
"metadata": {},
"outputs": [],
"source": [
"\n",
"word_fre = {}\n",
"for i in data_after_stop[lables == 0]:\n",
" for j in i:\n",
" if j not in word_fre.keys():\n",
" word_fre[j] = 1\n",
" else:\n",
" word_fre[j] += 1\n",
"\n",
"wc = WordCloud( background_color='white', font_path=r'C:/Windows/Fonts/SimHei.ttf')\n",
"wc.fit_words(word_fre)\n",
"plt.imshow(wc)\n",
"plt.show()\n",
"word_fre = {}\n",
"for i in data_after_stop[lables == 0]:\n",
" for j in i:\n",
" if j not in word_fre.keys():\n",
" word_fre[j] = 1\n",
" else:\n",
" word_fre[j] += 1\n",
"\n",
"wc = WordCloud( background_color='white', font_path=r'C:/Windows/Fonts/SimHei.ttf')\n",
"wc.fit_words(word_fre)\n",
"plt.imshow(wc)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bde6f19c",
"metadata": {},
"outputs": [],
"source": [
"0"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f089b32",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}