Merge branch 'master' of https://gitee.com/lzhcoooode/machine_learning_projects
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -0,0 +1,13 @@
|
||||
# 全部实习作业
|
||||
|
||||
1. 在线垃圾邮件识别
|
||||
2. Fashion-MNIST分类
|
||||
3. tips.csv数据可视化
|
||||
4. 国民经济核算数据分析
|
||||
5. 水质检测
|
||||
6. 泰坦尼克号数据集分析
|
||||
7. 灰度图像人脸识别
|
||||
8. 离职数据分析
|
||||
9. 菜谱订单数据分析
|
||||
10. 鸢尾花数据集的回归与聚类
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,22 @@
|
||||
# Importing Necessary Modules
|
||||
from flask import Flask, request, render_template
|
||||
from gnb_model import make_predict
|
||||
app = Flask(__name__)
|
||||
|
||||
result = []
|
||||
@app.route("/", methods=['GET', 'POST'])
|
||||
def input():
|
||||
global result
|
||||
if request.method == 'POST':
|
||||
print(request.form)
|
||||
if "clean" in request.form.keys():
|
||||
result = []
|
||||
sentence = request.form['sentence']
|
||||
if len(sentence) != 0:
|
||||
result.append((sentence, make_predict(sentence)))
|
||||
# result.append((sentence, "不是"))
|
||||
return render_template("input.html", result=result)
|
||||
|
||||
# main route to start with
|
||||
if __name__ == '__main__':
|
||||
app.run(debug=True, host='0.0.0.0')
|
||||
@@ -0,0 +1,77 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import jieba
|
||||
from pathlib import Path
|
||||
PATH_DATASET = Path.cwd().joinpath("../../dataset")
|
||||
DELETE_STOPWORDS = False
|
||||
|
||||
df = pd.read_csv(PATH_DATASET.joinpath("message80W1.csv"), header=None)
|
||||
|
||||
N_pos, N_neg = 10000, 10000
|
||||
df_positive = df[df[1]==0]
|
||||
df_negative = df[df[1]==1]
|
||||
np.random.seed(42)
|
||||
def sample_df(df, N):
|
||||
indexs = np.random.choice(np.arange(len(df)), N)
|
||||
return df.iloc[indexs,2]
|
||||
corpus_pos = sample_df(df_positive, N_pos)
|
||||
corpus_neg = sample_df(df_negative, N_neg)
|
||||
# corpus_pos = df_positive.sample(n=N_pos, random_state=42).iloc[:,2]
|
||||
# corpus_neg = df_negative.sample(n=N_neg, random_state=42).iloc[:,2]
|
||||
corpus = np.concatenate([corpus_pos, corpus_neg]).reshape(-1,1)
|
||||
y = np.concatenate([np.full(N_pos, 1), np.full(N_neg, 0)])
|
||||
|
||||
corpus_cut = np.apply_along_axis(lambda x: ' '.join(jieba.cut(x[0])), axis=1, arr=corpus)
|
||||
|
||||
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
|
||||
|
||||
with open(PATH_DATASET.joinpath("stopword.txt"), encoding='gbk') as file:
|
||||
stopwords = file.read().split()
|
||||
|
||||
# 频率向量化,token_pattern不取单个词作为特征的问题: https://blog.csdn.net/xxzhix/article/details/82685372
|
||||
if not DELETE_STOPWORDS:
|
||||
vectorizer = CountVectorizer(token_pattern='[\u4e00-\u9fa5_a-zA-Z0-9]{1,}')
|
||||
else:
|
||||
vectorizer_stopwords = CountVectorizer(token_pattern='[\u4e00-\u9fa5_a-zA-Z0-9]{1,}', stop_words=stopwords)
|
||||
X = vectorizer.fit_transform(corpus_cut)
|
||||
tfidf = TfidfTransformer()
|
||||
X = tfidf.fit_transform(X)
|
||||
|
||||
def to_vector(X, stopwords=False):
|
||||
X = np.array(X).reshape(-1, 1)
|
||||
cut = np.apply_along_axis(lambda x: ' '.join(jieba.cut(x[0])), axis=1, arr=X)
|
||||
vector = vectorizer.transform(cut)
|
||||
return tfidf.transform(vector)
|
||||
|
||||
from sklearn.model_selection import train_test_split
|
||||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
|
||||
|
||||
from sklearn.naive_bayes import GaussianNB
|
||||
from sklearn.metrics import accuracy_score
|
||||
gnb = GaussianNB()
|
||||
print("拟合中......")
|
||||
gnb.fit(X_train.toarray(), y_train)
|
||||
|
||||
print("预测中......")
|
||||
pred_train = gnb.predict(X_train.toarray())
|
||||
pred_test = gnb.predict(X_test.toarray())
|
||||
acc_train = accuracy_score(y_train, pred_train)
|
||||
acc_test = accuracy_score(y_test, pred_test)
|
||||
print(f"准确率 train/test: {acc_train:.4f}/{acc_test:.4f}")
|
||||
|
||||
def make_predict(string:str):
|
||||
string = [string]
|
||||
vector = to_vector(string).toarray()
|
||||
ret = "是垃圾" if gnb.predict(vector) == 0 else "不是垃圾"
|
||||
return ret
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(make_predict("尊敬的客上,感谢您一直的支持,亿美亿康美容部特在本月的x、x、x三天举办秒杀活动,现场更是优惠多多,开抢倒计时还有两天,欲抢从速!xx号艳艳"))
|
||||
print(make_predict("CSC喜欢打游戏"))
|
||||
print(make_predict("一刀999"))
|
||||
print(make_predict("你好"))
|
||||
print(make_predict("尊敬的客上,感谢您一直的支持,亿美亿康美容部特在本月的x、x、x三天举办秒杀活动,现场更是优惠多多,开抢倒计时还有两天,欲抢从速!xx号艳艳"))
|
||||
print(make_predict("秒杀价格8848,8848你值得拥有"))
|
||||
print(make_predict("有博主做过同类防晒霜的对比"))
|
||||
print(make_predict("csc每天打游戏"))
|
||||
print(make_predict("今天电脑爆炸了"))
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 178 KiB |
@@ -0,0 +1,26 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>进入</title>
|
||||
<style>
|
||||
body {
|
||||
text-align: center;
|
||||
background-color: green;
|
||||
}
|
||||
form {
|
||||
display: inline-block;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<h3>There is a form.</h3>
|
||||
<form action="/passing" method="post">
|
||||
<p>Name <input type="text", name="name"></p>
|
||||
<p>Email <input type="email", name="email"></p>
|
||||
<p>Phon nume <input type="text", name="phone"></p>
|
||||
<p><input type="submit" value="Submit!!!"></p>
|
||||
</form>
|
||||
<h4>HA!HA!HA!HA!</h4>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,14 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<title>Hello from Flask</title>
|
||||
</head>
|
||||
<body>
|
||||
{% if name %}
|
||||
<h1>Hello {{ name }}!</h1>
|
||||
{% else %}
|
||||
<h1>Hello, World!</h1>
|
||||
{% endif %}
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,39 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<header>
|
||||
<style>
|
||||
body {
|
||||
text-align: center;
|
||||
}
|
||||
</style>
|
||||
<meta charset="utf-8">
|
||||
<title>CSC识别器</title>
|
||||
</header>
|
||||
<body>
|
||||
<form action="/" method="post">
|
||||
<p>输入待识别文本:<input type="text" name="sentence" minlength="1"></p>
|
||||
<p><button>提交</button></p>
|
||||
<p><button name="clean">清空历史信息</button></p>
|
||||
<!-- <p><input type="submit" name="提交"></p> -->
|
||||
</form>
|
||||
{% if result|length >= 1 %}
|
||||
<table align="center" border="1">
|
||||
<thead><tr><th colspan="2">
|
||||
<strong>历史信息</strong>
|
||||
</th></tr></thead>
|
||||
<tbody>
|
||||
<tr>
|
||||
<td>文本</td>
|
||||
<td>是/否为垃圾</td>
|
||||
</tr>
|
||||
{% for sentence, predict in result %}
|
||||
<tr>
|
||||
<td>{{ sentence }}</td>
|
||||
<td>{{ predict }}</td>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</tbody>
|
||||
</table>
|
||||
{% endif %}
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,26 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
body {
|
||||
text-align: center;
|
||||
background-color: orange;
|
||||
}
|
||||
table {
|
||||
display: inline-block;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p><strong>Your Details</strong></p>
|
||||
<table border=1>
|
||||
{% for key, value in result.items() %}
|
||||
<tr>
|
||||
<th>{{ key }}</th>
|
||||
<th>{{ value }}</th>
|
||||
</tr>
|
||||
{% endfor %}
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,564 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "78c67687",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['images/1_1.jpg',\n",
|
||||
" 'images/1_10.jpg',\n",
|
||||
" 'images/1_11.jpg',\n",
|
||||
" 'images/1_12.jpg',\n",
|
||||
" 'images/1_13.jpg',\n",
|
||||
" 'images/1_14.jpg',\n",
|
||||
" 'images/1_15.jpg',\n",
|
||||
" 'images/1_16.jpg',\n",
|
||||
" 'images/1_17.jpg',\n",
|
||||
" 'images/1_18.jpg',\n",
|
||||
" 'images/1_19.jpg',\n",
|
||||
" 'images/1_2.jpg',\n",
|
||||
" 'images/1_20.jpg',\n",
|
||||
" 'images/1_21.jpg',\n",
|
||||
" 'images/1_22.jpg',\n",
|
||||
" 'images/1_23.jpg',\n",
|
||||
" 'images/1_24.jpg',\n",
|
||||
" 'images/1_25.jpg',\n",
|
||||
" 'images/1_26.jpg',\n",
|
||||
" 'images/1_27.jpg',\n",
|
||||
" 'images/1_28.jpg',\n",
|
||||
" 'images/1_3.jpg',\n",
|
||||
" 'images/1_30.jpg',\n",
|
||||
" 'images/1_31.jpg',\n",
|
||||
" 'images/1_32.jpg',\n",
|
||||
" 'images/1_33.jpg',\n",
|
||||
" 'images/1_34.jpg',\n",
|
||||
" 'images/1_35.jpg',\n",
|
||||
" 'images/1_36.jpg',\n",
|
||||
" 'images/1_37.jpg',\n",
|
||||
" 'images/1_38.jpg',\n",
|
||||
" 'images/1_39.jpg',\n",
|
||||
" 'images/1_4.jpg',\n",
|
||||
" 'images/1_40.jpg',\n",
|
||||
" 'images/1_41.jpg',\n",
|
||||
" 'images/1_42.jpg',\n",
|
||||
" 'images/1_43.jpg',\n",
|
||||
" 'images/1_44.jpg',\n",
|
||||
" 'images/1_45.jpg',\n",
|
||||
" 'images/1_46.jpg',\n",
|
||||
" 'images/1_47.jpg',\n",
|
||||
" 'images/1_48.jpg',\n",
|
||||
" 'images/1_49.jpg',\n",
|
||||
" 'images/1_5.jpg',\n",
|
||||
" 'images/1_51.jpg',\n",
|
||||
" 'images/1_6.jpg',\n",
|
||||
" 'images/1_7.jpg',\n",
|
||||
" 'images/1_8.jpg',\n",
|
||||
" 'images/1_9.jpg',\n",
|
||||
" 'images/2_1.jpg',\n",
|
||||
" 'images/2_10.jpg',\n",
|
||||
" 'images/2_12.jpg',\n",
|
||||
" 'images/2_13.jpg',\n",
|
||||
" 'images/2_14.jpg',\n",
|
||||
" 'images/2_15.jpg',\n",
|
||||
" 'images/2_17.jpg',\n",
|
||||
" 'images/2_18.jpg',\n",
|
||||
" 'images/2_19.jpg',\n",
|
||||
" 'images/2_2.jpg',\n",
|
||||
" 'images/2_20.jpg',\n",
|
||||
" 'images/2_21.jpg',\n",
|
||||
" 'images/2_22.jpg',\n",
|
||||
" 'images/2_23.jpg',\n",
|
||||
" 'images/2_24.jpg',\n",
|
||||
" 'images/2_26.jpg',\n",
|
||||
" 'images/2_27.jpg',\n",
|
||||
" 'images/2_28.jpg',\n",
|
||||
" 'images/2_29.jpg',\n",
|
||||
" 'images/2_3.jpg',\n",
|
||||
" 'images/2_30.jpg',\n",
|
||||
" 'images/2_31.jpg',\n",
|
||||
" 'images/2_32.jpg',\n",
|
||||
" 'images/2_33.jpg',\n",
|
||||
" 'images/2_34.jpg',\n",
|
||||
" 'images/2_35.jpg',\n",
|
||||
" 'images/2_36.jpg',\n",
|
||||
" 'images/2_37.jpg',\n",
|
||||
" 'images/2_38.jpg',\n",
|
||||
" 'images/2_39.jpg',\n",
|
||||
" 'images/2_4.jpg',\n",
|
||||
" 'images/2_40.jpg',\n",
|
||||
" 'images/2_41.jpg',\n",
|
||||
" 'images/2_42.jpg',\n",
|
||||
" 'images/2_43.jpg',\n",
|
||||
" 'images/2_44.jpg',\n",
|
||||
" 'images/2_5.jpg',\n",
|
||||
" 'images/2_6.jpg',\n",
|
||||
" 'images/2_7.jpg',\n",
|
||||
" 'images/2_8.jpg',\n",
|
||||
" 'images/2_9.jpg',\n",
|
||||
" 'images/3_10.jpg',\n",
|
||||
" 'images/3_11.jpg',\n",
|
||||
" 'images/3_12.jpg',\n",
|
||||
" 'images/3_13.jpg',\n",
|
||||
" 'images/3_14.jpg',\n",
|
||||
" 'images/3_15.jpg',\n",
|
||||
" 'images/3_16.jpg',\n",
|
||||
" 'images/3_17.jpg',\n",
|
||||
" 'images/3_18.jpg',\n",
|
||||
" 'images/3_19.jpg',\n",
|
||||
" 'images/3_20.jpg',\n",
|
||||
" 'images/3_21.jpg',\n",
|
||||
" 'images/3_22.jpg',\n",
|
||||
" 'images/3_23.jpg',\n",
|
||||
" 'images/3_25.jpg',\n",
|
||||
" 'images/3_26.jpg',\n",
|
||||
" 'images/3_27.jpg',\n",
|
||||
" 'images/3_28.jpg',\n",
|
||||
" 'images/3_29.jpg',\n",
|
||||
" 'images/3_3.jpg',\n",
|
||||
" 'images/3_30.jpg',\n",
|
||||
" 'images/3_31.jpg',\n",
|
||||
" 'images/3_32.jpg',\n",
|
||||
" 'images/3_34.jpg',\n",
|
||||
" 'images/3_38.jpg',\n",
|
||||
" 'images/3_40.jpg',\n",
|
||||
" 'images/3_42.jpg',\n",
|
||||
" 'images/3_43.jpg',\n",
|
||||
" 'images/3_44.jpg',\n",
|
||||
" 'images/3_45.jpg',\n",
|
||||
" 'images/3_46.jpg',\n",
|
||||
" 'images/3_47.jpg',\n",
|
||||
" 'images/3_48.jpg',\n",
|
||||
" 'images/3_49.jpg',\n",
|
||||
" 'images/3_5.jpg',\n",
|
||||
" 'images/3_50.jpg',\n",
|
||||
" 'images/3_51.jpg',\n",
|
||||
" 'images/3_52.jpg',\n",
|
||||
" 'images/3_53.jpg',\n",
|
||||
" 'images/3_55.jpg',\n",
|
||||
" 'images/3_56.jpg',\n",
|
||||
" 'images/3_57.jpg',\n",
|
||||
" 'images/3_58.jpg',\n",
|
||||
" 'images/3_59.jpg',\n",
|
||||
" 'images/3_65.jpg',\n",
|
||||
" 'images/3_66.jpg',\n",
|
||||
" 'images/3_7.jpg',\n",
|
||||
" 'images/3_71.jpg',\n",
|
||||
" 'images/3_72.jpg',\n",
|
||||
" 'images/3_73.jpg',\n",
|
||||
" 'images/3_74.jpg',\n",
|
||||
" 'images/3_75.jpg',\n",
|
||||
" 'images/3_76.jpg',\n",
|
||||
" 'images/3_77.jpg',\n",
|
||||
" 'images/3_78.jpg',\n",
|
||||
" 'images/3_8.jpg',\n",
|
||||
" 'images/3_9.jpg',\n",
|
||||
" 'images/4_10.jpg',\n",
|
||||
" 'images/4_11.jpg',\n",
|
||||
" 'images/4_13.jpg',\n",
|
||||
" 'images/4_14.jpg',\n",
|
||||
" 'images/4_21.jpg',\n",
|
||||
" 'images/4_6.jpg',\n",
|
||||
" 'images/4_7.jpg',\n",
|
||||
" 'images/4_9.jpg',\n",
|
||||
" 'images/5_1.jpg',\n",
|
||||
" 'images/5_2.jpg',\n",
|
||||
" 'images/5_3.jpg',\n",
|
||||
" 'images/5_4.jpg',\n",
|
||||
" 'images/5_5.jpg',\n",
|
||||
" 'images/5_6.jpg']"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"import numpy as np\n",
|
||||
"import os\n",
|
||||
"import cv2\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"plt.rcParams['font.sans-serif'] = ['SimHei']\n",
|
||||
"plt.rcParams['axes.unicode_minus'] = False\n",
|
||||
"\n",
|
||||
"def getimgnames(path=None):\n",
|
||||
" \"\"\"\n",
|
||||
" 获取指定文件夹中的JPG图片名称(含路径)\n",
|
||||
" :param path: 指定文件夹\n",
|
||||
" :return: path中的所有JPG图片名称(含路径,例如:./path/image1.jpg)\n",
|
||||
" \"\"\"\n",
|
||||
" imgnames = []\n",
|
||||
" filenames = os.listdir(path) # 获取path中的所有文件名\n",
|
||||
" for i in filenames:\n",
|
||||
" if re.findall('\\.jpg$', i) != []: # 在所有文件名中找出JPG图片名称\n",
|
||||
" imgnames.append(os.path.join(path, i)) # 将图片名称和路径合并、保存\n",
|
||||
" return imgnames\n",
|
||||
"\n",
|
||||
"imglist = getimgnames('images/')\n",
|
||||
"imglist"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "356b558c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'\\ndef cut_image(img, imgFile):\\n cx = int(np.size(img, 1))/2\\n cy = int(np.size(img, 0))/2\\n \\n plt.figure(figsize=(8,8))\\n plt.imshow(img)\\n\\n plt.plot([cx-50, cx+50], [cy+50, cy+50], \\'r\\', linewidth=2)\\n plt.plot([cx+50, cx+50], [cy-50, cy+50], \\'r\\', linewidth=2)\\n plt.plot([cx-50, cx+50], [cy-50, cy-50], \\'r\\', linewidth=2)\\n plt.plot([cx-50, cx-50], [cy-50, cy+50], \\'r\\', linewidth=2)\\n plt.annotate(\\'选取的水样窗口\\', xy=(cx+50,cy-50), xytext=(cx+300, cy-300),\\n arrowprops=dict(facecolor=\\'black\\', shrink=0.1))\\n\\n plt.title(\\'水色样本 \\'+imgFile+\\' 分辨率为\\'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\\n plt.show()\\n \\nfor i in range(len(imglist)):\\n img = cv2.imread(imglist[i])\\n cut_image(img, imglist[i])\\n'"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"\"\"\"\n",
|
||||
"def cut_image(img, imgFile):\n",
|
||||
" cx = int(np.size(img, 1))/2\n",
|
||||
" cy = int(np.size(img, 0))/2\n",
|
||||
" \n",
|
||||
" plt.figure(figsize=(8,8))\n",
|
||||
" plt.imshow(img)\n",
|
||||
"\n",
|
||||
" plt.plot([cx-50, cx+50], [cy+50, cy+50], 'r', linewidth=2)\n",
|
||||
" plt.plot([cx+50, cx+50], [cy-50, cy+50], 'r', linewidth=2)\n",
|
||||
" plt.plot([cx-50, cx+50], [cy-50, cy-50], 'r', linewidth=2)\n",
|
||||
" plt.plot([cx-50, cx-50], [cy-50, cy+50], 'r', linewidth=2)\n",
|
||||
" plt.annotate('选取的水样窗口', xy=(cx+50,cy-50), xytext=(cx+300, cy-300),\n",
|
||||
" arrowprops=dict(facecolor='black', shrink=0.1))\n",
|
||||
"\n",
|
||||
" plt.title('水色样本 '+imgFile+' 分辨率为'+str(img.size)+\" 类别标签 \"+str(imgFile[9]))\n",
|
||||
" plt.show()\n",
|
||||
" \n",
|
||||
"for i in range(len(imglist)):\n",
|
||||
" img = cv2.imread(imglist[i])\n",
|
||||
" cut_image(img, imglist[i])\n",
|
||||
"\"\"\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "85e7fe5e",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# 加载图像统计信息模块(注:也可以直接通过颜色通道来计算)\n",
|
||||
"from PIL import ImageStat,Image\n",
|
||||
"\n",
|
||||
"# 遍历全体图像进行快速检查\n",
|
||||
"size = 100\n",
|
||||
"imgPath = './images'\n",
|
||||
"imgWidth = [] # 图像宽度\n",
|
||||
"imgHeight = [] # 图像高度\n",
|
||||
"imgRrange = [] # 图像红色通道极差\n",
|
||||
"imgGrange = [] # 图像绿色通道极差\n",
|
||||
"imgBrange = [] # 图像蓝色通道极差\n",
|
||||
"\n",
|
||||
"newImgs = [] # 获得选取后的图像作为模型训练和验证数据\n",
|
||||
"\n",
|
||||
"imgFiles = os.listdir(imgPath)\n",
|
||||
"for imgFile in imgFiles:\n",
|
||||
" img = Image.open(os.path.join(imgPath,imgFile))\n",
|
||||
" imgWidth.append(img.size[0])\n",
|
||||
" imgHeight.append(img.size[1])\n",
|
||||
" \n",
|
||||
" # 获得图像中心区域大小为size的图像块\n",
|
||||
" cx, cy = (int(i/2) for i in img.size)\n",
|
||||
" box = (cx-50, cy-50, cx+50, cy+50)\n",
|
||||
" region = img.crop(box)\n",
|
||||
" \n",
|
||||
" # 计算选取图像块的标准差 分为红绿蓝三种\n",
|
||||
" stat = ImageStat.Stat(region)\n",
|
||||
" imgRrange.append(stat.extrema[0][1]-stat.extrema[0][0])\n",
|
||||
" imgGrange.append(stat.extrema[1][1]-stat.extrema[1][0])\n",
|
||||
" imgBrange.append(stat.extrema[2][1]-stat.extrema[2][0])\n",
|
||||
" \n",
|
||||
" newImgs.append(region)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "b5ad4f91",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"<class 'pandas.core.frame.DataFrame'>\n",
|
||||
"RangeIndex: 161 entries, 0 to 160\n",
|
||||
"Data columns (total 9 columns):\n",
|
||||
" # Column Non-Null Count Dtype \n",
|
||||
"--- ------ -------------- ----- \n",
|
||||
" 0 0 161 non-null float64\n",
|
||||
" 1 1 161 non-null float64\n",
|
||||
" 2 2 161 non-null float64\n",
|
||||
" 3 3 161 non-null float64\n",
|
||||
" 4 4 161 non-null float64\n",
|
||||
" 5 5 161 non-null float64\n",
|
||||
" 6 6 161 non-null float64\n",
|
||||
" 7 7 161 non-null float64\n",
|
||||
" 8 8 161 non-null float64\n",
|
||||
"dtypes: float64(9)\n",
|
||||
"memory usage: 11.4 KB\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div>\n",
|
||||
"<style scoped>\n",
|
||||
" .dataframe tbody tr th:only-of-type {\n",
|
||||
" vertical-align: middle;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe tbody tr th {\n",
|
||||
" vertical-align: top;\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
" .dataframe thead th {\n",
|
||||
" text-align: right;\n",
|
||||
" }\n",
|
||||
"</style>\n",
|
||||
"<table border=\"1\" class=\"dataframe\">\n",
|
||||
" <thead>\n",
|
||||
" <tr style=\"text-align: right;\">\n",
|
||||
" <th></th>\n",
|
||||
" <th>0</th>\n",
|
||||
" <th>1</th>\n",
|
||||
" <th>2</th>\n",
|
||||
" <th>3</th>\n",
|
||||
" <th>4</th>\n",
|
||||
" <th>5</th>\n",
|
||||
" <th>6</th>\n",
|
||||
" <th>7</th>\n",
|
||||
" <th>8</th>\n",
|
||||
" </tr>\n",
|
||||
" </thead>\n",
|
||||
" <tbody>\n",
|
||||
" <tr>\n",
|
||||
" <th>0</th>\n",
|
||||
" <td>148.6041</td>\n",
|
||||
" <td>138.6381</td>\n",
|
||||
" <td>64.3694</td>\n",
|
||||
" <td>3.625902</td>\n",
|
||||
" <td>4.125497</td>\n",
|
||||
" <td>10.485931</td>\n",
|
||||
" <td>4.242425</td>\n",
|
||||
" <td>4.798916</td>\n",
|
||||
" <td>12.044228</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>1</th>\n",
|
||||
" <td>163.6788</td>\n",
|
||||
" <td>145.5487</td>\n",
|
||||
" <td>54.4581</td>\n",
|
||||
" <td>3.923370</td>\n",
|
||||
" <td>2.835001</td>\n",
|
||||
" <td>3.478972</td>\n",
|
||||
" <td>4.439465</td>\n",
|
||||
" <td>3.239916</td>\n",
|
||||
" <td>4.039823</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>2</th>\n",
|
||||
" <td>153.9485</td>\n",
|
||||
" <td>147.0810</td>\n",
|
||||
" <td>71.9576</td>\n",
|
||||
" <td>2.208766</td>\n",
|
||||
" <td>1.803397</td>\n",
|
||||
" <td>3.115478</td>\n",
|
||||
" <td>2.623796</td>\n",
|
||||
" <td>2.139289</td>\n",
|
||||
" <td>3.621357</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>3</th>\n",
|
||||
" <td>150.3755</td>\n",
|
||||
" <td>151.3985</td>\n",
|
||||
" <td>64.3118</td>\n",
|
||||
" <td>2.015167</td>\n",
|
||||
" <td>1.514034</td>\n",
|
||||
" <td>2.698922</td>\n",
|
||||
" <td>2.344658</td>\n",
|
||||
" <td>1.809065</td>\n",
|
||||
" <td>3.136727</td>\n",
|
||||
" </tr>\n",
|
||||
" <tr>\n",
|
||||
" <th>4</th>\n",
|
||||
" <td>150.7380</td>\n",
|
||||
" <td>150.9738</td>\n",
|
||||
" <td>64.6246</td>\n",
|
||||
" <td>1.902934</td>\n",
|
||||
" <td>1.658045</td>\n",
|
||||
" <td>3.098044</td>\n",
|
||||
" <td>2.242270</td>\n",
|
||||
" <td>1.952067</td>\n",
|
||||
" <td>3.593836</td>\n",
|
||||
" </tr>\n",
|
||||
" </tbody>\n",
|
||||
"</table>\n",
|
||||
"</div>"
|
||||
],
|
||||
"text/plain": [
|
||||
" 0 1 2 3 4 5 6 \\\n",
|
||||
"0 148.6041 138.6381 64.3694 3.625902 4.125497 10.485931 4.242425 \n",
|
||||
"1 163.6788 145.5487 54.4581 3.923370 2.835001 3.478972 4.439465 \n",
|
||||
"2 153.9485 147.0810 71.9576 2.208766 1.803397 3.115478 2.623796 \n",
|
||||
"3 150.3755 151.3985 64.3118 2.015167 1.514034 2.698922 2.344658 \n",
|
||||
"4 150.7380 150.9738 64.6246 1.902934 1.658045 3.098044 2.242270 \n",
|
||||
"\n",
|
||||
" 7 8 \n",
|
||||
"0 4.798916 12.044228 \n",
|
||||
"1 3.239916 4.039823 \n",
|
||||
"2 2.139289 3.621357 \n",
|
||||
"3 1.809065 3.136727 \n",
|
||||
"4 1.952067 3.593836 "
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import pandas as pd\n",
|
||||
"# 构建训练数据集和分类标签\n",
|
||||
"data = []\n",
|
||||
"dy = []\n",
|
||||
"for i, img in enumerate(newImgs):\n",
|
||||
" r, g, b = np.split(np.array(img), 3, axis = 2)\n",
|
||||
" \n",
|
||||
" #计算一阶矩\n",
|
||||
" r_m1 = np.mean(r)\n",
|
||||
" g_m1 = np.mean(g)\n",
|
||||
" b_m1 = np.mean(b)\n",
|
||||
" \n",
|
||||
" #二阶矩\n",
|
||||
" r_m2 = np.std(r)\n",
|
||||
" g_m2 = np.std(g)\n",
|
||||
" b_m2 = np.std(b)\n",
|
||||
" \n",
|
||||
" #三阶矩\n",
|
||||
" r_m3 = np.mean(abs(r - r.mean())**3)**(1/3)\n",
|
||||
" g_m3 = np.mean(abs(g - g.mean())**3)**(1/3)\n",
|
||||
" b_m3 = np.mean(abs(b - b.mean())**3)**(1/3)\n",
|
||||
" \n",
|
||||
" # 构造新数据集\n",
|
||||
" df = np.array([r_m1,g_m1,b_m1,r_m2,g_m2,b_m2,r_m3,g_m3,b_m3])\n",
|
||||
" data.append(df)\n",
|
||||
" \n",
|
||||
" # 保存对应的分类标签\n",
|
||||
" dy.append(int(imgFiles[i][0]))\n",
|
||||
"\n",
|
||||
"dy = np.array(dy)\n",
|
||||
"data = pd.DataFrame(np.array(data))\n",
|
||||
"data.info()\n",
|
||||
"data.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "da56ab61",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\anaconda\\lib\\site-packages\\scipy\\__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.24.3\n",
|
||||
" warnings.warn(f\"A NumPy version >={np_minversion} and <{np_maxversion}\"\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"((128, 9), (33, 9), (128,))"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.model_selection import train_test_split\n",
|
||||
"x_train,x_test,y_train,y_test=train_test_split(data,dy,test_size=0.2,random_state=0)\n",
|
||||
"x_train.shape,x_test.shape,y_train.shape"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "8c6be264",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"0.7575757575757576\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"D:\\anaconda\\lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:692: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (200) reached and the optimization hasn't converged yet.\n",
|
||||
" warnings.warn(\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from sklearn.metrics import accuracy_score\n",
|
||||
"from sklearn.neural_network import MLPClassifier\n",
|
||||
"mlp=MLPClassifier()\n",
|
||||
"mlp.fit(x_train,y_train)\n",
|
||||
"y_pred=mlp.predict(x_test)\n",
|
||||
"print(accuracy_score(y_pred,y_test))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "d7c33fc8",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user