Merge branch 'master' of https://gitee.com/lzhcoooode/machine_learning_projects

2023-07-13 22:04:40 +08:00
parent 733c55b78a 67476cc96e
commit e4eae978ac
25 changed files with 7165 additions and 0 deletions
@@ -0,0 +1,13 @@
+# 全部实习作业
+
+1. 在线垃圾邮件识别
+2. Fashion-MNIST分类
+3. tips.csv数据可视化
+4. 国民经济核算数据分析
+5. 水质检测
+6. 泰坦尼克号数据集分析
+7. 灰度图像人脸识别
+8. 离职数据分析
+9. 菜谱订单数据分析
+10. 鸢尾花数据集的回归与聚类
+
@@ -0,0 +1,22 @@
+# Importing Necessary Modules
+from flask import Flask, request, render_template
+from gnb_model import make_predict
+app = Flask(__name__)
+
+result = []
+@app.route("/", methods=['GET', 'POST'])
+def input():
+	global result
+	if request.method == 'POST':
+		print(request.form)
+		if "clean" in request.form.keys():
+			result = []
+		sentence = request.form['sentence']
+		if len(sentence) != 0:
+			result.append((sentence, make_predict(sentence)))
+			# result.append((sentence, "不是"))
+	return render_template("input.html", result=result)
+
+# main route to start with
+if __name__ == '__main__':
+	app.run(debug=True, host='0.0.0.0')
@@ -0,0 +1,77 @@
+import numpy as np
+import pandas as pd
+import jieba
+from pathlib import Path
+PATH_DATASET = Path.cwd().joinpath("../../dataset")
+DELETE_STOPWORDS = False
+
+df = pd.read_csv(PATH_DATASET.joinpath("message80W1.csv"), header=None)
+
+N_pos, N_neg = 10000, 10000
+df_positive = df[df[1]==0]
+df_negative = df[df[1]==1]
+np.random.seed(42)
+def sample_df(df, N):
+    indexs = np.random.choice(np.arange(len(df)), N)
+    return df.iloc[indexs,2]
+corpus_pos = sample_df(df_positive, N_pos)
+corpus_neg = sample_df(df_negative, N_neg)
+# corpus_pos = df_positive.sample(n=N_pos, random_state=42).iloc[:,2]
+# corpus_neg = df_negative.sample(n=N_neg, random_state=42).iloc[:,2]
+corpus = np.concatenate([corpus_pos, corpus_neg]).reshape(-1,1)
+y = np.concatenate([np.full(N_pos, 1), np.full(N_neg, 0)])
+
+corpus_cut = np.apply_along_axis(lambda x: ' '.join(jieba.cut(x[0])), axis=1, arr=corpus)
+
+from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
+
+with open(PATH_DATASET.joinpath("stopword.txt"), encoding='gbk') as file:
+    stopwords = file.read().split()
+
+# 频率向量化，token_pattern不取单个词作为特征的问题: https://blog.csdn.net/xxzhix/article/details/82685372
+if not DELETE_STOPWORDS:
+    vectorizer = CountVectorizer(token_pattern='[\u4e00-\u9fa5_a-zA-Z0-9]{1,}')
+else:
+    vectorizer_stopwords = CountVectorizer(token_pattern='[\u4e00-\u9fa5_a-zA-Z0-9]{1,}', stop_words=stopwords)
+X = vectorizer.fit_transform(corpus_cut)
+tfidf = TfidfTransformer()
+X = tfidf.fit_transform(X)
+
+def to_vector(X, stopwords=False):
+    X = np.array(X).reshape(-1, 1)
+    cut = np.apply_along_axis(lambda x: ' '.join(jieba.cut(x[0])), axis=1, arr=X)
+    vector = vectorizer.transform(cut)
+    return tfidf.transform(vector)
+
+from sklearn.model_selection import train_test_split
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
+
+from sklearn.naive_bayes import GaussianNB
+from sklearn.metrics import accuracy_score
+gnb = GaussianNB()
+print("拟合中......")
+gnb.fit(X_train.toarray(), y_train)
+
+print("预测中......")
+pred_train = gnb.predict(X_train.toarray())
+pred_test = gnb.predict(X_test.toarray())
+acc_train = accuracy_score(y_train, pred_train)
+acc_test = accuracy_score(y_test, pred_test)
+print(f"准确率 train/test: {acc_train:.4f}/{acc_test:.4f}")
+
+def make_predict(string:str):
+    string = [string]
+    vector = to_vector(string).toarray()
+    ret = "是垃圾" if gnb.predict(vector) == 0 else "不是垃圾"
+    return ret
+
+if __name__ == '__main__':
+    print(make_predict("尊敬的客上，感谢您一直的支持，亿美亿康美容部特在本月的x、x、x三天举办秒杀活动，现场更是优惠多多，开抢倒计时还有两天，欲抢从速！xx号艳艳"))
+    print(make_predict("CSC喜欢打游戏"))
+    print(make_predict("一刀999"))
+    print(make_predict("你好"))
+    print(make_predict("尊敬的客上，感谢您一直的支持，亿美亿康美容部特在本月的x、x、x三天举办秒杀活动，现场更是优惠多多，开抢倒计时还有两天，欲抢从速！xx号艳艳"))
+    print(make_predict("秒杀价格8848，8848你值得拥有"))
+    print(make_predict("有博主做过同类防晒霜的对比"))
+    print(make_predict("csc每天打游戏"))
+    print(make_predict("今天电脑爆炸了"))
@@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta charset="utf-8">
+        <title>进入</title>
+        <style>
+            body {
+                text-align: center;
+                background-color: green;
+            }
+            form {
+                display: inline-block;
+            }
+        </style>
+    </head>
+    <body>
+        <h3>There is a form.</h3>
+        <form action="/passing" method="post">
+            <p>Name <input type="text", name="name"></p>
+            <p>Email <input type="email", name="email"></p>
+            <p>Phon nume <input type="text", name="phone"></p>
+            <p><input type="submit" value="Submit!!!"></p>
+        </form>
+        <h4>HA!HA!HA!HA!</h4>
+    </body>
+</html>
@@ -0,0 +1,14 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <meta charset="utf-8">
+        <title>Hello from Flask</title>
+    </head>
+    <body>
+        {% if name %}
+        <h1>Hello {{ name }}!</h1>
+        {% else %}
+        <h1>Hello, World!</h1>
+        {% endif %}
+    </body>
+</html>
@@ -0,0 +1,39 @@
+<!DOCTYPE html>
+<html>
+    <header>
+        <style>
+            body {
+                text-align: center;
+            }
+        </style>
+        <meta charset="utf-8">
+        <title>CSC识别器</title>
+    </header>
+    <body>
+        <form action="/" method="post">
+            <p>输入待识别文本：<input type="text" name="sentence" minlength="1"></p>
+            <p><button>提交</button></p>
+            <p><button name="clean">清空历史信息</button></p>
+            <!-- <p><input type="submit" name="提交"></p> -->
+        </form>
+        {% if result|length >= 1 %}
+        <table align="center" border="1">
+            <thead><tr><th colspan="2">
+                <strong>历史信息</strong>
+            </th></tr></thead>
+            <tbody>
+                <tr>
+                    <td>文本</td>
+                    <td>是/否为垃圾</td>
+                </tr>
+                {% for sentence, predict in result %}
+                <tr>
+                    <td>{{ sentence }}</td>
+                    <td>{{ predict }}</td>
+                </tr>
+                {% endfor %}
+            </tbody>
+        </table>
+        {% endif %}
+    </body>
+</html>
@@ -0,0 +1,26 @@
+<!DOCTYPE html>
+<html>
+    <head>
+        <style>
+            body {
+                text-align: center;
+                background-color: orange;
+            }
+            table {
+                display: inline-block;
+                border-collapse: collapse;
+            }
+        </style>
+    </head>
+    <body>
+        <p><strong>Your Details</strong></p>
+        <table border=1>
+            {% for key, value in result.items() %}
+            <tr>
+                <th>{{ key }}</th>
+                <th>{{ value }}</th>
+            </tr>
+            {% endfor %}
+        </table>
+    </body>
+</html>
@@ -0,0 +1,564 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "78c67687",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['images/1_1.jpg',\n",
+       " 'images/1_10.jpg',\n",
+       " 'images/1_11.jpg',\n",
+       " 'images/1_12.jpg',\n",
+       " 'images/1_13.jpg',\n",
+       " 'images/1_14.jpg',\n",
+       " 'images/1_15.jpg',\n",
+       " 'images/1_16.jpg',\n",
+       " 'images/1_17.jpg',\n",
+       " 'images/1_18.jpg',\n",
+       " 'images/1_19.jpg',\n",
+       " 'images/1_2.jpg',\n",
+       " 'images/1_20.jpg',\n",
+       " 'images/1_21.jpg',\n",
+       " 'images/1_22.jpg',\n",
+       " 'images/1_23.jpg',\n",
+       " 'images/1_24.jpg',\n",
+       " 'images/1_25.jpg',\n",
+       " 'images/1_26.jpg',\n",
+       " 'images/1_27.jpg',\n",
+       " 'images/1_28.jpg',\n",
+       " 'images/1_3.jpg',\n",
+       " 'images/1_30.jpg',\n",
+       " 'images/1_31.jpg',\n",
+       " 'images/1_32.jpg',\n",
+       " 'images/1_33.jpg',\n",
+       " 'images/1_34.jpg',\n",
+       " 'images/1_35.jpg',\n",
+       " 'images/1_36.jpg',\n",
+       " 'images/1_37.jpg',\n",
+       " 'images/1_38.jpg',\n",
+       " 'images/1_39.jpg',\n",
+       " 'images/1_4.jpg',\n",
+       " 'images/1_40.jpg',\n",
+       " 'images/1_41.jpg',\n",
+       " 'images/1_42.jpg',\n",
+       " 'images/1_43.jpg',\n",
+       " 'images/1_44.jpg',\n",
+       " 'images/1_45.jpg',\n",
+       " 'images/1_46.jpg',\n",
+       " 'images/1_47.jpg',\n",
+       " 'images/1_48.jpg',\n",
+       " 'images/1_49.jpg',\n",
+       " 'images/1_5.jpg',\n",
+       " 'images/1_51.jpg',\n",
+       " 'images/1_6.jpg',\n",
+       " 'images/1_7.jpg',\n",
+       " 'images/1_8.jpg',\n",
+       " 'images/1_9.jpg',\n",
+       " 'images/2_1.jpg',\n",
+       " 'images/2_10.jpg',\n",
+       " 'images/2_12.jpg',\n",
+       " 'images/2_13.jpg',\n",
+       " 'images/2_14.jpg',\n",
+       " 'images/2_15.jpg',\n",
+       " 'images/2_17.jpg',\n",
+       " 'images/2_18.jpg',\n",
+       " 'images/2_19.jpg',\n",
+       " 'images/2_2.jpg',\n",
+       " 'images/2_20.jpg',\n",
+       " 'images/2_21.jpg',\n",
+       " 'images/2_22.jpg',\n",
+       " 'images/2_23.jpg',\n",
+       " 'images/2_24.jpg',\n",
+       " 'images/2_26.jpg',\n",
+       " 'images/2_27.jpg',\n",
+       " 'images/2_28.jpg',\n",
+       " 'images/2_29.jpg',\n",
+       " 'images/2_3.jpg',\n",
+       " 'images/2_30.jpg',\n",
+       " 'images/2_31.jpg',\n",
+       " 'images/2_32.jpg',\n",
+       " 'images/2_33.jpg',\n",
+       " 'images/2_34.jpg',\n",
+       " 'images/2_35.jpg',\n",
+       " 'images/2_36.jpg',\n",
+       " 'images/2_37.jpg',\n",
+       " 'images/2_38.jpg',\n",
+       " 'images/2_39.jpg',\n",
+       " 'images/2_4.jpg',\n",
+       " 'images/2_40.jpg',\n",
+       " 'images/2_41.jpg',\n",
+       " 'images/2_42.jpg',\n",
+       " 'images/2_43.jpg',\n",
+       " 'images/2_44.jpg',\n",
+       " 'images/2_5.jpg',\n",
+       " 'images/2_6.jpg',\n",
+       " 'images/2_7.jpg',\n",
+       " 'images/2_8.jpg',\n",
+       " 'images/2_9.jpg',\n",
+       " 'images/3_10.jpg',\n",
+       " 'images/3_11.jpg',\n",
+       " 'images/3_12.jpg',\n",
+       " 'images/3_13.jpg',\n",
+       " 'images/3_14.jpg',\n",
+       " 'images/3_15.jpg',\n",
+       " 'images/3_16.jpg',\n",
+       " 'images/3_17.jpg',\n",
+       " 'images/3_18.jpg',\n",
+       " 'images/3_19.jpg',\n",
+       " 'images/3_20.jpg',\n",
+       " 'images/3_21.jpg',\n",
+       " 'images/3_22.jpg',\n",
+       " 'images/3_23.jpg',\n",
+       " 'images/3_25.jpg',\n",
+       " 'images/3_26.jpg',\n",
+       " 'images/3_27.jpg',\n",
+       " 'images/3_28.jpg',\n",
+       " 'images/3_29.jpg',\n",
+       " 'images/3_3.jpg',\n",
+       " 'images/3_30.jpg',\n",
+       " 'images/3_31.jpg',\n",
+       " 'images/3_32.jpg',\n",
+       " 'images/3_34.jpg',\n",
+       " 'images/3_38.jpg',\n",
+       " 'images/3_40.jpg',\n",
+       " 'images/3_42.jpg',\n",
+       " 'images/3_43.jpg',\n",
+       " 'images/3_44.jpg',\n",
+       " 'images/3_45.jpg',\n",
+       " 'images/3_46.jpg',\n",
+       " 'images/3_47.jpg',\n",
+       " 'images/3_48.jpg',\n",
+       " 'images/3_49.jpg',\n",
+       " 'images/3_5.jpg',\n",
+       " 'images/3_50.jpg',\n",
+       " 'images/3_51.jpg',\n",
+       " 'images/3_52.jpg',\n",
+       " 'images/3_53.jpg',\n",
+       " 'images/3_55.jpg',\n",
+       " 'images/3_56.jpg',\n",
+       " 'images/3_57.jpg',\n",
+       " 'images/3_58.jpg',\n",
+       " 'images/3_59.jpg',\n",
+       " 'images/3_65.jpg',\n",
+       " 'images/3_66.jpg',\n",
+       " 'images/3_7.jpg',\n",
+       " 'images/3_71.jpg',\n",
+       " 'images/3_72.jpg',\n",
+       " 'images/3_73.jpg',\n",
+       " 'images/3_74.jpg',\n",
+       " 'images/3_75.jpg',\n",
+       " 'images/3_76.jpg',\n",
+       " 'images/3_77.jpg',\n",
+       " 'images/3_78.jpg',\n",
+       " 'images/3_8.jpg',\n",
+       " 'images/3_9.jpg',\n",
+       " 'images/4_10.jpg',\n",
+       " 'images/4_11.jpg',\n",
+       " 'images/4_13.jpg',\n",
+       " 'images/4_14.jpg',\n",
+       " 'images/4_21.jpg',\n",
+       " 'images/4_6.jpg',\n",
+       " 'images/4_7.jpg',\n",
+       " 'images/4_9.jpg',\n",
+       " 'images/5_1.jpg',\n",
+       " 'images/5_2.jpg',\n",
+       " 'images/5_3.jpg',\n",
+       " 'images/5_4.jpg',\n",
+       " 'images/5_5.jpg',\n",
+       " 'images/5_6.jpg']"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import re\n",
+    "import numpy as np\n",
+    "import os\n",
+    "import cv2\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "plt.rcParams['font.sans-serif'] = ['SimHei']\n",
+    "plt.rcParams['axes.unicode_minus'] = False\n",
+    "\n",
+    "def getimgnames(path=None):\n",
+    "    \"\"\"\n",
+    "    获取指定文件夹中的JPG图片名称（含路径）\n",
+    "    :param path: 指定文件夹\n",
+    "    :return: path中的所有JPG图片名称（含路径，例如：./path/image1.jpg）\n",
+    "    \"\"\"\n",
+    "    imgnames = []\n",
+    "    filenames = os.listdir(path)  # 获取path中的所有文件名\n",
+    "    for i in filenames:\n",
+    "        if re.findall('\\.jpg$', i) != []:  # 在所有文件名中找出JPG图片名称\n",
+    "            imgnames.append(os.path.join(path, i))  # 将图片名称和路径合并、保存\n",
+    "    return imgnames\n",
+    "\n",
+    "imglist = getimgnames('images/')\n",
+    "imglist"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "356b558c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'\\ndef cut_image(img, imgFile):\\n    cx = int(np.size(img, 1))/2\\n    cy = int(np.size(img, 0))/2\\n    \\n    plt.figure(figsize=(8,8))\\n    plt.imshow(img)\\n\\n    plt.plot([cx-50, cx+50], [cy+50, cy+50], \\'r\\', linewidth=2)\\n    plt.plot([cx+50, cx+50], [cy-50, cy+50], \\'r\\', linewidth=2)\\n    plt.plot([cx-50, cx+50], [cy-50, cy-50], \\'r\\', linewidth=2)\\n    plt.plot([cx-50, cx-50], [cy-50, cy+50], \\'r\\', linewidth=2)\\n    plt.annotate(\\'选取的水样窗口\\', xy=(cx+50,cy-50), xytext=(cx+300, cy-300),\\n            arrowprops=dict(facecolor=\\'black\\', shrink=0.1))\\n\\n    plt.title(\\'水色样本 \\'+imgFile+\\' 分辨率为\\'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\\n    plt.show()\\n    \\nfor i in range(len(imglist)):\\n    img = cv2.imread(imglist[i])\\n    cut_image(img, imglist[i])\\n'"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\"\"\"\n",
+    "def cut_image(img, imgFile):\n",
+    "    cx = int(np.size(img, 1))/2\n",
+    "    cy = int(np.size(img, 0))/2\n",
+    "    \n",
+    "    plt.figure(figsize=(8,8))\n",
+    "    plt.imshow(img)\n",
+    "\n",
+    "    plt.plot([cx-50, cx+50], [cy+50, cy+50], 'r', linewidth=2)\n",
+    "    plt.plot([cx+50, cx+50], [cy-50, cy+50], 'r', linewidth=2)\n",
+    "    plt.plot([cx-50, cx+50], [cy-50, cy-50], 'r', linewidth=2)\n",
+    "    plt.plot([cx-50, cx-50], [cy-50, cy+50], 'r', linewidth=2)\n",
+    "    plt.annotate('选取的水样窗口', xy=(cx+50,cy-50), xytext=(cx+300, cy-300),\n",
+    "            arrowprops=dict(facecolor='black', shrink=0.1))\n",
+    "\n",
+    "    plt.title('水色样本 '+imgFile+' 分辨率为'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\n",
+    "    plt.show()\n",
+    "    \n",
+    "for i in range(len(imglist)):\n",
+    "    img = cv2.imread(imglist[i])\n",
+    "    cut_image(img, imglist[i])\n",
+    "\"\"\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "85e7fe5e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 加载图像统计信息模块（注：也可以直接通过颜色通道来计算）\n",
+    "from PIL import ImageStat,Image\n",
+    "\n",
+    "# 遍历全体图像进行快速检查\n",
+    "size = 100\n",
+    "imgPath = './images'\n",
+    "imgWidth = []       # 图像宽度\n",
+    "imgHeight = []      # 图像高度\n",
+    "imgRrange = []      # 图像红色通道极差\n",
+    "imgGrange = []      # 图像绿色通道极差\n",
+    "imgBrange = []      # 图像蓝色通道极差\n",
+    "\n",
+    "newImgs = []        # 获得选取后的图像作为模型训练和验证数据\n",
+    "\n",
+    "imgFiles = os.listdir(imgPath)\n",
+    "for imgFile in imgFiles:\n",
+    "    img = Image.open(os.path.join(imgPath,imgFile))\n",
+    "    imgWidth.append(img.size[0])\n",
+    "    imgHeight.append(img.size[1])\n",
+    "    \n",
+    "    # 获得图像中心区域大小为size的图像块\n",
+    "    cx, cy = (int(i/2) for i in img.size)\n",
+    "    box = (cx-50, cy-50, cx+50, cy+50)\n",
+    "    region = img.crop(box)\n",
+    "    \n",
+    "    # 计算选取图像块的标准差 分为红绿蓝三种\n",
+    "    stat = ImageStat.Stat(region)\n",
+    "    imgRrange.append(stat.extrema[0][1]-stat.extrema[0][0])\n",
+    "    imgGrange.append(stat.extrema[1][1]-stat.extrema[1][0])\n",
+    "    imgBrange.append(stat.extrema[2][1]-stat.extrema[2][0])\n",
+    "    \n",
+    "    newImgs.append(region)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b5ad4f91",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "RangeIndex: 161 entries, 0 to 160\n",
+      "Data columns (total 9 columns):\n",
+      " #   Column  Non-Null Count  Dtype  \n",
+      "---  ------  --------------  -----  \n",
+      " 0   0       161 non-null    float64\n",
+      " 1   1       161 non-null    float64\n",
+      " 2   2       161 non-null    float64\n",
+      " 3   3       161 non-null    float64\n",
+      " 4   4       161 non-null    float64\n",
+      " 5   5       161 non-null    float64\n",
+      " 6   6       161 non-null    float64\n",
+      " 7   7       161 non-null    float64\n",
+      " 8   8       161 non-null    float64\n",
+      "dtypes: float64(9)\n",
+      "memory usage: 11.4 KB\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>148.6041</td>\n",
+       "      <td>138.6381</td>\n",
+       "      <td>64.3694</td>\n",
+       "      <td>3.625902</td>\n",
+       "      <td>4.125497</td>\n",
+       "      <td>10.485931</td>\n",
+       "      <td>4.242425</td>\n",
+       "      <td>4.798916</td>\n",
+       "      <td>12.044228</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>163.6788</td>\n",
+       "      <td>145.5487</td>\n",
+       "      <td>54.4581</td>\n",
+       "      <td>3.923370</td>\n",
+       "      <td>2.835001</td>\n",
+       "      <td>3.478972</td>\n",
+       "      <td>4.439465</td>\n",
+       "      <td>3.239916</td>\n",
+       "      <td>4.039823</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>153.9485</td>\n",
+       "      <td>147.0810</td>\n",
+       "      <td>71.9576</td>\n",
+       "      <td>2.208766</td>\n",
+       "      <td>1.803397</td>\n",
+       "      <td>3.115478</td>\n",
+       "      <td>2.623796</td>\n",
+       "      <td>2.139289</td>\n",
+       "      <td>3.621357</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>150.3755</td>\n",
+       "      <td>151.3985</td>\n",
+       "      <td>64.3118</td>\n",
+       "      <td>2.015167</td>\n",
+       "      <td>1.514034</td>\n",
+       "      <td>2.698922</td>\n",
+       "      <td>2.344658</td>\n",
+       "      <td>1.809065</td>\n",
+       "      <td>3.136727</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>150.7380</td>\n",
+       "      <td>150.9738</td>\n",
+       "      <td>64.6246</td>\n",
+       "      <td>1.902934</td>\n",
+       "      <td>1.658045</td>\n",
+       "      <td>3.098044</td>\n",
+       "      <td>2.242270</td>\n",
+       "      <td>1.952067</td>\n",
+       "      <td>3.593836</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1        2         3         4          5         6  \\\n",
+       "0  148.6041  138.6381  64.3694  3.625902  4.125497  10.485931  4.242425   \n",
+       "1  163.6788  145.5487  54.4581  3.923370  2.835001   3.478972  4.439465   \n",
+       "2  153.9485  147.0810  71.9576  2.208766  1.803397   3.115478  2.623796   \n",
+       "3  150.3755  151.3985  64.3118  2.015167  1.514034   2.698922  2.344658   \n",
+       "4  150.7380  150.9738  64.6246  1.902934  1.658045   3.098044  2.242270   \n",
+       "\n",
+       "          7          8  \n",
+       "0  4.798916  12.044228  \n",
+       "1  3.239916   4.039823  \n",
+       "2  2.139289   3.621357  \n",
+       "3  1.809065   3.136727  \n",
+       "4  1.952067   3.593836  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "# 构建训练数据集和分类标签\n",
+    "data = []\n",
+    "dy = []\n",
+    "for i, img in enumerate(newImgs):\n",
+    "    r, g, b = np.split(np.array(img), 3, axis = 2)\n",
+    "    \n",
+    "    #计算一阶矩\n",
+    "    r_m1 = np.mean(r)\n",
+    "    g_m1 = np.mean(g)\n",
+    "    b_m1 = np.mean(b)\n",
+    "    \n",
+    "    #二阶矩\n",
+    "    r_m2 = np.std(r)\n",
+    "    g_m2 = np.std(g)\n",
+    "    b_m2 = np.std(b)\n",
+    "    \n",
+    "    #三阶矩\n",
+    "    r_m3 = np.mean(abs(r - r.mean())**3)**(1/3)\n",
+    "    g_m3 = np.mean(abs(g - g.mean())**3)**(1/3)\n",
+    "    b_m3 = np.mean(abs(b - b.mean())**3)**(1/3)\n",
+    "    \n",
+    "    # 构造新数据集\n",
+    "    df = np.array([r_m1,g_m1,b_m1,r_m2,g_m2,b_m2,r_m3,g_m3,b_m3])\n",
+    "    data.append(df)\n",
+    "    \n",
+    "    # 保存对应的分类标签\n",
+    "    dy.append(int(imgFiles[i][0]))\n",
+    "\n",
+    "dy = np.array(dy)\n",
+    "data = pd.DataFrame(np.array(data))\n",
+    "data.info()\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "da56ab61",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "D:\\anaconda\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n",
+      "  warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "((128, 9), (33, 9), (128,))"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "x_train,x_test,y_train,y_test=train_test_split(data,dy,test_size=0.2,random_state=0)\n",
+    "x_train.shape,x_test.shape,y_train.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "8c6be264",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.7575757575757576\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "D:\\anaconda\\lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:692: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.neural_network import MLPClassifier\n",
+    "mlp=MLPClassifier()\n",
+    "mlp.fit(x_train,y_train)\n",
+    "y_pred=mlp.predict(x_test)\n",
+    "print(accuracy_score(y_pred,y_test))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d7c33fc8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}