新增数据分析报告生成
This commit is contained in:
Generated
+3
-1
@@ -13,7 +13,9 @@
|
||||
</facet>
|
||||
</component>
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.6 (Auto_maching_learning)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
|
||||
Generated
+1
-2
@@ -1,8 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding">
|
||||
<file url="file://$PROJECT_DIR$/temp/03833.csv" charset="GBK" />
|
||||
<file url="file://$PROJECT_DIR$/temp/喷子终极语录总.txt" charset="GBK" />
|
||||
<file url="file://$PROJECT_DIR$/tree.txt" charset="GBK" />
|
||||
<file url="file://$PROJECT_DIR$/utils/generate.py" charset="GBK" />
|
||||
</component>
|
||||
</project>
|
||||
+15
-8
@@ -1,15 +1,22 @@
|
||||
from django.urls import path
|
||||
from ModelSelection import views
|
||||
from django.conf.urls import url
|
||||
from django.views.generic.base import TemplateView
|
||||
|
||||
from .views import *
|
||||
|
||||
app_name = 'api'
|
||||
urlpatterns = [
|
||||
path('', TemplateView.as_view(template_name='index.html')),
|
||||
path('index/', TemplateView.as_view(template_name='index.html')),
|
||||
path('upload_dataset',upload_dataset),
|
||||
path('get_data_list',get_data_list),
|
||||
path('show_dataset',show_dataset),
|
||||
path('del_dataset',del_dataset),
|
||||
path('generate_code',generate_code)
|
||||
]
|
||||
path('check_exist', check_exist), # 注册校验用户是否存在
|
||||
path('send_code', send_code), # 发送验证码
|
||||
path('regist', regist), # 注册
|
||||
path('login', login), # 登录
|
||||
path('upload_dataset', upload_dataset), # 上传数据集文件
|
||||
path('get_data_list', get_data_list), # 获取数据集列表
|
||||
path('show_dataset', show_dataset), # 预览数据集
|
||||
path('show_dataset_report', show_dataset_report), # 预览数据报告
|
||||
path('del_dataset', del_dataset), # 删除数据集
|
||||
path('generate_code', generate_code), # 生成代码
|
||||
path('export_code', export_code), # 导出代码文件
|
||||
|
||||
]
|
||||
|
||||
+215
-79
@@ -1,126 +1,234 @@
|
||||
from django.shortcuts import render
|
||||
from django.http import JsonResponse,HttpResponse
|
||||
from django.views.decorators.http import require_http_methods
|
||||
from utils.dataset_process import DatasetProcess
|
||||
from utils.model_choose import SetModel
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
import json
|
||||
import pandas as pd
|
||||
from django.http import JsonResponse, HttpResponse
|
||||
from django.shortcuts import render
|
||||
from django.views.decorators.http import require_http_methods
|
||||
|
||||
from utils.dataset_process import DatasetProcess
|
||||
from utils.model_choose import SetModel
|
||||
from utils.user_operate import UserProcess, Regist, sendCode
|
||||
|
||||
# Create your views here.
|
||||
|
||||
UP = UserProcess()
|
||||
|
||||
|
||||
def get_datesets_list(request):
|
||||
return render(request, "../templates/dist/index.html")
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
def login(request):
|
||||
'''
|
||||
用户登录
|
||||
:param request:
|
||||
:return:
|
||||
'''
|
||||
try:
|
||||
postBody = json.loads(request.body)
|
||||
username = postBody.get("username")
|
||||
password = postBody.get("password")
|
||||
res = UP.login(username, password)
|
||||
return JsonResponse({"msg": res})
|
||||
except:
|
||||
return JsonResponse({'msg': res})
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
def check_exist(request):
|
||||
'''
|
||||
查询注册信息中的用户名/邮箱号是否存在
|
||||
:param request:
|
||||
:return:
|
||||
'''
|
||||
try:
|
||||
postBody = json.loads(request.body)
|
||||
query_dict = postBody.get("query_dict")
|
||||
res = UP.check_exist(query_dict)
|
||||
return JsonResponse({"msg": res})
|
||||
except:
|
||||
return JsonResponse({'msg': res})
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
def regist(request):
|
||||
pass
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None,
|
||||
}
|
||||
try:
|
||||
postBody = json.loads(request.body)
|
||||
username = postBody.get('username')
|
||||
password = postBody.get('password')
|
||||
email = postBody.get("email")
|
||||
phone = postBody.get("phone")
|
||||
isVip = postBody.get("isVip", False)
|
||||
checkcode = postBody.get("checkcode")
|
||||
infos = dict(
|
||||
username=username,
|
||||
password=password,
|
||||
phone=phone,
|
||||
email=email,
|
||||
isVip=isVip,
|
||||
dataset=[]
|
||||
)
|
||||
if Regist(infos, check_code=checkcode):
|
||||
data['code'] = 200
|
||||
data['msg'] = '注册成功'
|
||||
else:
|
||||
raise Exception("验证码错误,注册失败")
|
||||
except Exception as e:
|
||||
data['code'] = 500
|
||||
data['msg'] = str(e)
|
||||
return JsonResponse(data, status=data['code'])
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
def send_code(request):
|
||||
postBody = json.loads(request.body)
|
||||
email = postBody.get("email")
|
||||
flag = sendCode(email)
|
||||
return JsonResponse({"msg": flag}, status=200)
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
def upload_dataset(request):
|
||||
'''
|
||||
上传数据集到mongodb
|
||||
'''
|
||||
|
||||
username='admin'
|
||||
dp=DatasetProcess(username=username)
|
||||
data={
|
||||
"msg":None,
|
||||
"code":None,
|
||||
"data":None
|
||||
username = request.POST.get("username").replace('"', '')
|
||||
print("upload username:" + username)
|
||||
dp = DatasetProcess(username=username)
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None,
|
||||
"data": None
|
||||
}
|
||||
try:
|
||||
upload_file=request.FILES.getlist('file')
|
||||
upload_file = request.FILES.getlist('file')
|
||||
for f in upload_file:
|
||||
save_path = os.path.join("temp", f.name)
|
||||
with open(save_path,"wb") as des:
|
||||
with open(save_path, "wb") as des:
|
||||
for chunk in f.chunks():
|
||||
des.write(chunk)
|
||||
res=dp.upload(save_path,username)
|
||||
res = dp.upload(save_path, username)
|
||||
if res[0]:
|
||||
data['msg']=res[1]
|
||||
data['code']=200
|
||||
#删除临时文件
|
||||
data['msg'] = res[1]
|
||||
data['code'] = 200
|
||||
# 删除临时文件
|
||||
os.remove(save_path)
|
||||
else:
|
||||
data['msg']=res[1]
|
||||
data['msg'] = res[1]
|
||||
os.remove(save_path)
|
||||
raise Exception("上传失败")
|
||||
except Exception as e:
|
||||
data['code']=500
|
||||
data['code'] = 500
|
||||
traceback.print_exc()
|
||||
return JsonResponse(data,safe=False,status=data['code'])
|
||||
return JsonResponse(data, safe=False, status=data['code'])
|
||||
|
||||
|
||||
@require_http_methods(['GET'])
|
||||
def get_data_list(request):
|
||||
'''
|
||||
获取用户上传的所有数据集名称
|
||||
'''
|
||||
data={
|
||||
"msg":None,
|
||||
"code":None,
|
||||
"data":{}
|
||||
}
|
||||
username='admin'
|
||||
dp=DatasetProcess(username=username)
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None,
|
||||
"data": {}
|
||||
}
|
||||
username = request.GET.get('username').replace('"', '')
|
||||
dp = DatasetProcess(username=username)
|
||||
try:
|
||||
names,upload_times=dp.get_dataset_info()
|
||||
data['data']['name']=names
|
||||
data['data']['upload_time']=upload_times
|
||||
|
||||
names, upload_times = dp.get_dataset_info()
|
||||
data['data']['name'] = names
|
||||
data['data']['upload_time'] = upload_times
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
data['msg']=str(e)
|
||||
data['code']=500
|
||||
return JsonResponse(data,status=data['code'], safe=False)
|
||||
data['msg'] = str(e)
|
||||
data['code'] = 500
|
||||
return JsonResponse(data, status=data['code'], safe=False)
|
||||
|
||||
|
||||
@require_http_methods(['GET'])
|
||||
def show_dataset(request):
|
||||
'''
|
||||
根据用户选择的数据集,展示所有数据
|
||||
'''
|
||||
data={
|
||||
"msg":None,
|
||||
"code":None,
|
||||
"data":None
|
||||
}
|
||||
username="admin"
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None,
|
||||
"data": None
|
||||
}
|
||||
|
||||
username = request.GET.get('username').replace('"', '')
|
||||
dp = DatasetProcess(username=username)
|
||||
try:
|
||||
dataset_name=request.GET.get("dataset_name")
|
||||
data_dict=dp.get_dataset(dataset_name)
|
||||
dataset_name = request.GET.get("dataset_name")
|
||||
data_dict = dp.get_dataset(dataset_name)
|
||||
# 将上述字典转Dataframe将缺失值填充为 "" 否则前端无法展示
|
||||
data_dict = pd.DataFrame(data_dict).fillna("").to_dict(orient='list')
|
||||
if data_dict is None:
|
||||
raise Exception("数据加载失败")
|
||||
data['data']=data_dict
|
||||
data['data']['cols']=list(data_dict.keys())
|
||||
data['code']=200
|
||||
data['msg']="Success"
|
||||
data['data'] = data_dict
|
||||
data['data']['cols'] = list(data_dict.keys())
|
||||
data['code'] = 200
|
||||
data['msg'] = "Success"
|
||||
except Exception as e:
|
||||
data['code']=500
|
||||
data['msg']=str(e)
|
||||
data['code'] = 500
|
||||
data['msg'] = str(e)
|
||||
traceback.print_exc()
|
||||
return JsonResponse(data,status=data['code'])
|
||||
return JsonResponse(data, status=data['code'], json_dumps_params={'ensure_ascii': False})
|
||||
|
||||
|
||||
@require_http_methods(['GET'])
|
||||
def show_dataset_report(request):
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None,
|
||||
"data": ""
|
||||
}
|
||||
username = request.GET.get('username').replace('"', '')
|
||||
dataset_name = request.GET.get('dataset_name').replace('"', '')
|
||||
dp = DatasetProcess(username=username)
|
||||
try:
|
||||
html_name = dp.generate_report(dataset_name)
|
||||
data["data"] = html_name
|
||||
data["code"] = 200
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
data['msg'] = str(e)
|
||||
data['code'] = 500
|
||||
return JsonResponse(data, status=data['code'], safe=False)
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
def del_dataset(request):
|
||||
data={
|
||||
"msg":None,
|
||||
"code":None
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None
|
||||
}
|
||||
username = "admin"
|
||||
dp = DatasetProcess(username=username)
|
||||
|
||||
try:
|
||||
concat = request.POST
|
||||
postBody = json.loads(request.body)
|
||||
dataset_name=postBody.get("dataset_name",None)
|
||||
username = postBody.get('username').replace('"', '')
|
||||
dp = DatasetProcess(username=username)
|
||||
dataset_name = postBody.get("dataset_name", None).replace('"', '')
|
||||
if dataset_name and dp.delete(dataset_name):
|
||||
data['code']=200
|
||||
data['msg']=dataset_name + " 删除成功 !"
|
||||
data['code'] = 200
|
||||
data['msg'] = dataset_name + " 删除成功 !"
|
||||
else:
|
||||
raise Exception("删除失败")
|
||||
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
data['msg']=str(e)
|
||||
data['code']=500
|
||||
return JsonResponse(data,status=data['code'])
|
||||
data['msg'] = str(e)
|
||||
data['code'] = 500
|
||||
return JsonResponse(data, status=data['code'])
|
||||
|
||||
|
||||
@require_http_methods(['POST'])
|
||||
@@ -130,25 +238,53 @@ def generate_code(request):
|
||||
"code": None
|
||||
}
|
||||
try:
|
||||
postBody=json.loads(request.body)
|
||||
username=postBody.pop('username')
|
||||
postBody=postBody.get('data')
|
||||
postBody = json.loads(request.body)
|
||||
username = postBody.pop('username')
|
||||
postBody = postBody.get('data')
|
||||
name = postBody.get('name')
|
||||
dataset_name = postBody.get('dataset_name')
|
||||
features = postBody.get('features')
|
||||
target = postBody.get('target')
|
||||
model_type = postBody.get('model_type')
|
||||
model_name = postBody.get('models')
|
||||
evaluate_methods = postBody.get("metrics")
|
||||
myModel = SetModel(name, dataset_name, features, target, model_type, model_name, username, evaluate_methods, )
|
||||
|
||||
dataset_name=postBody.get('dataset_name')
|
||||
features=postBody.get('features')
|
||||
target=postBody.get('target')
|
||||
model_type=postBody.get('model_type')
|
||||
model_name=postBody.get('models')
|
||||
evaluate_methods=postBody.get("metrics")
|
||||
myModel=SetModel(dataset_name,features,target,model_type,model_name,evaluate_methods)
|
||||
|
||||
codes=myModel.get_code()
|
||||
codes = myModel.get_code()
|
||||
if codes:
|
||||
data['data']=codes
|
||||
data['msg']="上传成功"
|
||||
data['code']=200
|
||||
return JsonResponse(data,status=data['code'])
|
||||
data['data'] = codes
|
||||
data['msg'] = "上传成功"
|
||||
data['code'] = 200
|
||||
return JsonResponse(data, status=data['code'])
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return JsonResponse({'msg':str(e)})
|
||||
return JsonResponse({'msg': str(e)})
|
||||
|
||||
|
||||
@require_http_methods(['GET'])
|
||||
def export_code(request):
|
||||
'''
|
||||
根据用户选择的数据集,展示所有数据
|
||||
'''
|
||||
data = {
|
||||
"msg": None,
|
||||
"code": None,
|
||||
"data": None
|
||||
}
|
||||
try:
|
||||
username = request.GET.get('username')
|
||||
name = request.GET.get('name')
|
||||
filename = "generate_{}_{}.py".format(username, name)
|
||||
filepath = os.path.join(os.path.abspath(''), 'temp', filename)
|
||||
if os.path.exists(filepath):
|
||||
with open(filepath, 'rb') as f:
|
||||
response = HttpResponse(f)
|
||||
response['Content-Type'] = 'application/octet-stream'
|
||||
response['Content-Disposition'] = 'attachment;filename=' + filename
|
||||
# response['Set-Cookie'] = "fileDownload=true; path=/" # 前端如果使用了插件就需要加上这一行
|
||||
return response
|
||||
else:
|
||||
raise Exception('文件不存在')
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return HttpResponse(str(e))
|
||||
|
||||
+1
-1
@@ -25,4 +25,4 @@ MODEL_DICT = {
|
||||
'混淆矩阵': 'plot_confusion_matrix.py'
|
||||
}
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
print(MODEL_DICT['分类']['KNN'])
|
||||
|
||||
+3
-8
@@ -16,11 +16,6 @@ model = MODEL.fit(X_train, y_train)
|
||||
#预测
|
||||
y_pred = model.predict(X_test)
|
||||
# 模型评估
|
||||
# 绘制混淆矩阵
|
||||
cnf_matrix = confusion_matrix(y_test, y_pred)
|
||||
np.set_printoptions(precision=len(y.unique())) # 设置打印数量的阈值
|
||||
class_names = y.unique()
|
||||
test_report = classification_report(y_test, y_pred)
|
||||
print(test_report)
|
||||
plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix')
|
||||
plot_ROC_curve(y_test, y_pred)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -8,3 +8,4 @@ def plot_ROC_curve(y_test, y_predict):
|
||||
plt.ylabel('TPR')
|
||||
plt.xlabel('FPR')
|
||||
plt.show()
|
||||
plot_ROC_curve(y_test, y_pred)
|
||||
@@ -18,4 +18,13 @@ def plot_confusion_matrix(cm, classes, normalize=False,title='Confusion matrix',
|
||||
plt.tight_layout()
|
||||
plt.ylabel('True label')
|
||||
plt.xlabel('Predicted label')
|
||||
plt.show()
|
||||
plt.show()
|
||||
|
||||
#分类评估报告
|
||||
test_report = classification_report(y_test, y_pred)
|
||||
print(test_report)
|
||||
# 绘制混淆矩阵
|
||||
cnf_matrix = confusion_matrix(y_test, y_pred)
|
||||
np.set_printoptions(precision=len(y.unique())) # 设置打印数量的阈值
|
||||
class_names = y.unique()
|
||||
plot_confusion_matrix(cnf_matrix, classes=class_names, title='Confusion matrix')
|
||||
+78
-34
@@ -3,49 +3,93 @@
|
||||
'''
|
||||
|
||||
import smtplib
|
||||
import pymongo
|
||||
from email.mime.text import MIMEText
|
||||
from email.header import Header
|
||||
import random
|
||||
import time
|
||||
from temp import EmailTemp
|
||||
|
||||
# 用于构建邮件头
|
||||
|
||||
# 发信方的信息:发信邮箱,QQ 邮箱授权码
|
||||
password = ''
|
||||
# 收信方邮箱
|
||||
to_addr = ''
|
||||
def send_email(password, to_addr):
|
||||
'''
|
||||
:param password: Email authorization code
|
||||
:param to_addr: recevier email address
|
||||
:return:{
|
||||
to_addr:收件人邮箱地址
|
||||
time:发送成功后的时间戳
|
||||
}
|
||||
'''
|
||||
from_addr = 'yikechengxushu@qq.com'
|
||||
smtp_server = 'smtp.qq.com'
|
||||
# 邮箱正文内容,第一个参数为内容,第二个参数为格式(plain 为纯文本),第三个参数为编码
|
||||
validate_num=random.randint(100000,999999)
|
||||
msg = MIMEText("你的验证码(注意,验证码仅5分钟内有效):{}".format(validate_num), 'plain', 'utf-8')
|
||||
# 邮件头信息
|
||||
msg['From'] = Header(from_addr)
|
||||
msg['To'] = Header(to_addr)
|
||||
msg['Subject'] = Header('随机验证码')
|
||||
to_addr = '526494747@qq.com'
|
||||
class EmailService():
|
||||
def __init__(self):
|
||||
self.client = pymongo.MongoClient(host="localhost", port=27017)
|
||||
self.mydb = self.client["AML"]
|
||||
self.temp_collection = self.mydb["temp"]
|
||||
self.valid_time=60*5
|
||||
def send_email(self, to_addr,password='htvviggqfrwobbfc'):
|
||||
'''
|
||||
:param password: Email authorization code
|
||||
:param to_addr: recevier email address
|
||||
:return:{
|
||||
to_addr:收件人邮箱地址
|
||||
time:发送成功后的时间戳
|
||||
}
|
||||
'''
|
||||
from_addr = 'yikechengxushu@qq.com'
|
||||
smtp_server = 'smtp.qq.com'
|
||||
# 邮箱正文内容,第一个参数为内容,第二个参数为格式(plain 为纯文本),第三个参数为编码
|
||||
validate_num=random.randint(100000,999999)
|
||||
msg = MIMEText("你的验证码(注意,验证码仅{}分钟内有效):{}".format(int(self.valid_time/60),validate_num), 'plain', 'utf-8')
|
||||
# 邮件头信息
|
||||
msg['From'] = Header(from_addr)
|
||||
msg['To'] = Header(to_addr)
|
||||
msg['Subject'] = Header('随机验证码')
|
||||
|
||||
# 开启发信服务,这里使用的是加密传输
|
||||
server = smtplib.SMTP_SSL()
|
||||
server.connect(smtp_server, 465)
|
||||
# 登录发信邮箱
|
||||
server.login(from_addr, password)
|
||||
# 发送邮件
|
||||
server.sendmail(from_addr, to_addr, msg.as_string())
|
||||
# 关闭服务器
|
||||
server.quit()
|
||||
return {
|
||||
to_addr:validate_num,
|
||||
'send_time':time.time()
|
||||
}
|
||||
# 开启发信服务,这里使用的是加密传输
|
||||
server = smtplib.SMTP_SSL()
|
||||
server.connect(smtp_server, 465)
|
||||
# 登录发信邮箱
|
||||
server.login(from_addr, password)
|
||||
# 发送邮件
|
||||
server.sendmail(from_addr, to_addr, msg.as_string())
|
||||
# 关闭服务器
|
||||
server.quit()
|
||||
send_info={
|
||||
'address':to_addr,
|
||||
'check_code': validate_num,
|
||||
'send_time': time.time()
|
||||
}
|
||||
self.temp_collection.delete_many({"address":to_addr})
|
||||
self.temp_collection.insert_one(send_info)
|
||||
return send_info
|
||||
|
||||
def check_input(self,text,address):
|
||||
'''
|
||||
输入验证码校验
|
||||
:param text:
|
||||
:param address:
|
||||
:return:
|
||||
'''
|
||||
query={"address":address}
|
||||
print("用户输入信息:",text,type(text))
|
||||
|
||||
regist_info=self.temp_collection.find_one(query)
|
||||
print(regist_info)
|
||||
if regist_info:
|
||||
send_time=regist_info.get('send_time')
|
||||
if time.time()-send_time>self.valid_time:
|
||||
self.temp_collection.delete_one(query)
|
||||
return False
|
||||
if text==str(regist_info.get("check_code")):
|
||||
self.temp_collection.delete_one(query)
|
||||
print(address+'注册成功')
|
||||
return True
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
# start=time.time()
|
||||
# em=EmailService()
|
||||
# mail="526494747@qq.com"
|
||||
# # em.send_email(mail)
|
||||
# a=em.check_input('156882',mail)
|
||||
# end=time.time()
|
||||
# print("发送校验耗时:",start-end)
|
||||
# print(a)
|
||||
|
||||
password='htvviggqfrwobbfc'
|
||||
print(send_email(password,to_addr))
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
'''
|
||||
1、缺失值填充(默认填0,也可输入)
|
||||
2、相关性分析
|
||||
3、数据映射
|
||||
4、根据某列统计(计数、求和,均值)
|
||||
'''
|
||||
OPERATIONS={
|
||||
|
||||
}
|
||||
|
||||
+59
-44
@@ -5,60 +5,65 @@
|
||||
删除数据文件
|
||||
'''
|
||||
|
||||
import pymongo
|
||||
import pandas as pd
|
||||
from ModelSelection.models import UserModel,DatasetModel
|
||||
import traceback
|
||||
import os
|
||||
import time
|
||||
import traceback
|
||||
|
||||
import pandas as pd
|
||||
import pandas_profiling
|
||||
import pymongo
|
||||
|
||||
from ModelSelection.models import DatasetModel
|
||||
|
||||
|
||||
# username='root'
|
||||
# password='lzh.mongo.admin'
|
||||
# url='47.97.197.244'
|
||||
# port=27017
|
||||
class DatasetProcess():
|
||||
def __init__(self,database="AML",collection="user_model",username="admin"):
|
||||
self.client =pymongo.MongoClient(host="localhost",port=27017)
|
||||
self.mydb=self.client[database]
|
||||
self.user_collection=self.mydb[collection]
|
||||
self.username=username
|
||||
def __init__(self, database="AML", collection="user_model", username="admin"):
|
||||
self.client = pymongo.MongoClient(host="localhost", port=27017)
|
||||
self.mydb = self.client[database]
|
||||
self.user_collection = self.mydb[collection]
|
||||
self.username = username
|
||||
self.user = self.user_collection.find_one({"username": self.username})
|
||||
self.isVip=self.user.get('isVip')
|
||||
self.datasets= [i.get('name') for i in self.user['dataset']]
|
||||
self.isVip = self.user.get('isVip', False)
|
||||
self.datasets = [i.get('name') for i in self.user['dataset']]
|
||||
self.columns = []
|
||||
self.DM=DatasetModel.objects()
|
||||
def get_dataset_info(self):
|
||||
names=[i.get('name') for i in self.user['dataset']]
|
||||
upload_times=[i.get('upload_time') for i in self.user['dataset']]
|
||||
return names,upload_times
|
||||
|
||||
self.DM = DatasetModel.objects()
|
||||
|
||||
def upload(self,file_path,username):
|
||||
def get_dataset_info(self):
|
||||
names = [i.get('name') for i in self.user['dataset']]
|
||||
upload_times = [i.get('upload_time') for i in self.user['dataset']]
|
||||
return names, upload_times
|
||||
|
||||
def upload(self, file_path, username):
|
||||
'''
|
||||
:param file_path: 用户上传的文件路径
|
||||
:return :是否成功上传的bool值
|
||||
'''
|
||||
#文件后缀检查
|
||||
postfix=os.path.split(file_path)[-1].split(".")
|
||||
# 文件后缀检查
|
||||
postfix = os.path.split(file_path)[-1].split(".")
|
||||
filename = postfix[0] + "_" + postfix[1]
|
||||
if not self.isVip:
|
||||
return False,"非会员最多存储五份数据集"
|
||||
if (len(self.datasets) > 5):
|
||||
return False, "非会员最多存储五份数据集"
|
||||
if filename in self.datasets:
|
||||
return False,"该数据集已存在"
|
||||
return False, "该数据集已存在"
|
||||
try:
|
||||
if postfix[1]=='xls' or postfix[1]=='xlsx':
|
||||
df=pd.read_excel(file_path)
|
||||
elif postfix[1]=="csv" or postfix[1]=='txt':
|
||||
df = pd.read_csv(file_path,encoding="utf-8")
|
||||
if postfix[1] == 'xls' or postfix[1] == 'xlsx':
|
||||
df = pd.read_excel(file_path)
|
||||
elif postfix[1] == "csv" or postfix[1] == 'txt':
|
||||
df = pd.read_csv(file_path, encoding="utf-8")
|
||||
except UnicodeDecodeError as e:
|
||||
df = pd.read_csv(file_path, encoding="gbk")
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return False,str(e)
|
||||
#将dataframe转换为字典形式
|
||||
cols=df.columns
|
||||
data = {i: df[i].tolist() for i in cols }
|
||||
upload_time=time.time()
|
||||
return False, str(e)
|
||||
# 将dataframe转换为字典形式
|
||||
cols = df.columns
|
||||
data = {i: df[i].tolist() for i in cols}
|
||||
upload_time = time.time()
|
||||
try:
|
||||
try:
|
||||
self.DM.create(
|
||||
@@ -83,36 +88,48 @@ class DatasetProcess():
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
return False, str(e)
|
||||
return True,"上传成功"
|
||||
return True, "上传成功"
|
||||
|
||||
def get_dataset(self,dataset_name):
|
||||
def get_dataset(self, dataset_name):
|
||||
'''
|
||||
从数据库取出数据
|
||||
:param dataset_name:数据集名称
|
||||
:return: 获取数据集转换为字典
|
||||
'''
|
||||
model=self.mydb['dataset_model']
|
||||
query=dict(dataset_name=dataset_name,username=self.username)
|
||||
res=model.find_one(query)
|
||||
model = self.mydb['dataset_model']
|
||||
query = dict(dataset_name=dataset_name, username=self.username)
|
||||
res = model.find_one(query)
|
||||
return res['data']
|
||||
|
||||
# todo 后续确保filename为相对路径
|
||||
def generate_report(self, dataset_name):
|
||||
filename = "E:/study/项目/AML_frontend/static/%s_%s.html" % (self.username, dataset_name)
|
||||
if not os.path.exists(filename):
|
||||
model = self.mydb['dataset_model']
|
||||
query = dict(dataset_name=dataset_name, username=self.username)
|
||||
res = model.find_one(query)
|
||||
df = pd.DataFrame(res["data"])
|
||||
report = pandas_profiling.ProfileReport(df)
|
||||
report.to_file(filename)
|
||||
return os.path.split(filename)[-1]
|
||||
|
||||
def delete(self,dataset_name):
|
||||
def delete(self, dataset_name):
|
||||
'''
|
||||
根据数据集名称删除数据
|
||||
:param dataset_name:
|
||||
:return:
|
||||
'''
|
||||
|
||||
self.user_collection.update({"username":self.username},{"$pull":{"dataset":{"name":dataset_name}}})
|
||||
self.DM.filter(username=self.username,dataset_name=dataset_name).delete()
|
||||
self.user_collection.update({"username": self.username}, {"$pull": {"dataset": {"name": dataset_name}}})
|
||||
self.DM.filter(username=self.username, dataset_name=dataset_name).delete()
|
||||
return True
|
||||
|
||||
|
||||
#
|
||||
if __name__=="__main__":
|
||||
pass
|
||||
if __name__ == "__main__":
|
||||
# path="../Datasets/day.csv"
|
||||
# dp=DatasetProcess()
|
||||
dp = DatasetProcess(username="lzh3")
|
||||
dp.generate_report("day_csv")
|
||||
# a=dp.get_dataset('aapl_csv')
|
||||
# print(a)
|
||||
# res=dp.get_dataset('not')
|
||||
@@ -122,5 +139,3 @@ if __name__=="__main__":
|
||||
# dp.upload(path)#将day.csv上传
|
||||
# a=dp.get_dataset("hour")
|
||||
# print(pd.DataFrame(a))
|
||||
|
||||
|
||||
|
||||
+43
-53
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
|
||||
# from codes import MODEL_DICT
|
||||
|
||||
'''
|
||||
@@ -16,26 +17,8 @@ import os
|
||||
|
||||
- 主函数预先定义一个代码文件,相关参数通过占位符填充,填充的参数来源于前段输入,包括:主要包括特征列、目标列,文件名
|
||||
'''
|
||||
MODEL_DICT={
|
||||
'分类':{
|
||||
'朴素贝叶斯':'from sklearn.naive_bayes import GaussianNB',
|
||||
'决策树':'from sklearn.tree import DecisionTreeClassifier',
|
||||
'支持向量机':'from sklearn.svm import SVC',
|
||||
'逻辑回归': 'from sklearn.linear_model import LogisticRegression',
|
||||
'神经网络':'from sklearn.neural_network import MLPClassifier'
|
||||
},
|
||||
'回归':{
|
||||
'线性回归':'from sklearn.linear_model import LinearRegression',
|
||||
'决策树':'from sklearn.tree import DecisionTreeRegressor',
|
||||
'支持向量机':'from sklearn.svm import SVR',
|
||||
'神经网络':'from sklearn.neural_network import MLPRegressor'
|
||||
},
|
||||
'聚类':{
|
||||
'K-means':'from sklearn.cluster import KMeans'
|
||||
},
|
||||
'ROC曲线':'plot_ROC_curve.py',
|
||||
'混淆矩阵':'plot_confusion_matrix.py'
|
||||
}
|
||||
from codes.MODEL_DICT import MODEL_DICT
|
||||
|
||||
|
||||
class SetModel():
|
||||
'''
|
||||
@@ -50,9 +33,10 @@ class SetModel():
|
||||
}
|
||||
'''
|
||||
|
||||
def __init__(self, dataset_name,features,target,model_type,model_name,evaluate_methods=[]):
|
||||
def __init__(self, name, dataset_name, features, target, model_type, model_name, username='', evaluate_methods=[]):
|
||||
'''
|
||||
|
||||
:param name(str,):任务名称
|
||||
:param dataset_name(str,):数据集名称
|
||||
:param features(str of list):特征列
|
||||
:param target(str):目标
|
||||
@@ -60,16 +44,18 @@ class SetModel():
|
||||
:param model_name(str of list):模型
|
||||
:param evaluate_methods(str of list,非必填):模型评估方法
|
||||
'''
|
||||
self.code_files=os.path.join(os.path.abspath(''),'codes')
|
||||
self.dataset_name=dataset_name
|
||||
self.target=target
|
||||
self.features=features
|
||||
self.model_type=model_type
|
||||
self.model_name=model_name
|
||||
self.evaluate_methods=evaluate_methods
|
||||
self.generate=''
|
||||
self.code_files = os.path.join(os.path.abspath(''), 'codes')
|
||||
self.name = name
|
||||
self.dataset_name = dataset_name
|
||||
self.target = target
|
||||
self.features = features
|
||||
self.model_type = model_type
|
||||
self.model_name = model_name
|
||||
self.evaluate_methods = evaluate_methods
|
||||
self.username = username
|
||||
self.generate = ''
|
||||
|
||||
def clean_data(self,df,cols,op,standard=''):
|
||||
def clean_data(self, df, cols, op, standard=''):
|
||||
'''
|
||||
自动数据清洗
|
||||
df:
|
||||
@@ -79,51 +65,55 @@ class SetModel():
|
||||
if op == 'fillna':
|
||||
df.loc[:, cols].fillna()
|
||||
elif op == 'dropna':
|
||||
df.loc[:,cols].dropna()
|
||||
df.loc[:, cols].dropna()
|
||||
else:
|
||||
df.loc[:,cols].apply(op)
|
||||
df.loc[:, cols].apply(op)
|
||||
return df
|
||||
|
||||
def joint_code(self,code_path,encoding='utf-8'):
|
||||
def joint_code(self, code_path, encoding='utf-8'):
|
||||
'''拼接代码文件'''
|
||||
try:
|
||||
f = open(os.path.join(self.code_files,code_path), 'r',encoding=encoding)
|
||||
self.generate += f.read()+'\n'
|
||||
f = open(os.path.join(self.code_files, code_path), 'r', encoding=encoding)
|
||||
self.generate += f.read() + '\n'
|
||||
except:
|
||||
f = open(os.path.join(self.code_files,code_path), 'r', encoding='gbk')
|
||||
self.generate += f.read()+'\n'
|
||||
f = open(os.path.join(self.code_files, code_path), 'r', encoding='gbk')
|
||||
self.generate += f.read() + '\n'
|
||||
|
||||
def get_code(self):
|
||||
#生成代码
|
||||
# 生成代码
|
||||
# 拼接导入的库
|
||||
self.joint_code('ImportPackages.py')
|
||||
for model in self.model_name:
|
||||
self.generate+='\n'+MODEL_DICT[self.model_type][model]+'\n'
|
||||
|
||||
# 拼接函数评估方法
|
||||
if len(self.evaluate_methods)!=0:
|
||||
for method in self.evaluate_methods:
|
||||
self.joint_code(MODEL_DICT[method])
|
||||
self.generate += '\n' + MODEL_DICT[self.model_type][model] + '\n'
|
||||
|
||||
# 拼接变量
|
||||
for model in self.model_name:
|
||||
sklearn_model = MODEL_DICT[self.model_type][model].split(' ')[-1] + '()'
|
||||
self.generate+='''
|
||||
self.generate += '''
|
||||
FILE_PATH='./{}'\n
|
||||
FEATURES={}\n
|
||||
TARGET='{}'\n
|
||||
MODEL={}\n
|
||||
'''.format(self.dataset_name.replace('_','.'), self.features, self.target, sklearn_model)
|
||||
#拼接主函数
|
||||
'''.format(self.dataset_name.replace('_', '.'), self.features, self.target, sklearn_model)
|
||||
# 拼接主函数
|
||||
self.joint_code('Main.py')
|
||||
# 拼接函数评估方法
|
||||
if len(self.evaluate_methods) != 0:
|
||||
for method in self.evaluate_methods:
|
||||
location = MODEL_DICT.get(method)
|
||||
if location:
|
||||
self.joint_code(location)
|
||||
|
||||
#生成代码文件
|
||||
with open(os.path.join(os.path.abspath(''),'temp/generate.py'),'w',encoding='utf-8') as f:
|
||||
# 生成代码文件
|
||||
filename = "generate_{}_{}.py".format(self.username, self.name)
|
||||
filepath = os.path.join(os.path.abspath(''), 'temp', filename)
|
||||
print("存放路径:", filepath)
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(self.generate)
|
||||
f.close()
|
||||
# 返回生成代码的文本
|
||||
return self.generate
|
||||
|
||||
|
||||
if __name__=='__main__':
|
||||
myModel=SetModel('day',['cnt','yr','weekday'],'season','分类',['决策树'],['混淆矩阵','ROC曲线'])
|
||||
myModel.get_code()
|
||||
if __name__ == '__main__':
|
||||
myModel = SetModel('day', ['cnt', 'yr', 'weekday'], 'season', '分类', ['决策树'], ['混淆矩阵', 'ROC曲线'])
|
||||
myModel.get_code()
|
||||
|
||||
@@ -0,0 +1,66 @@
|
||||
from ModelSelection.models import UserModel
|
||||
import traceback
|
||||
import pymongo
|
||||
from utils.EmailClient import EmailService
|
||||
|
||||
em = EmailService()
|
||||
class UserProcess():
|
||||
def __init__(self,database="AML",collection="user_model"):
|
||||
self.client =pymongo.MongoClient(host="localhost",port=27017)
|
||||
self.mydb = self.client[database]
|
||||
self.user_collection = self.mydb[collection]
|
||||
|
||||
def check_exist(self,query_dict):
|
||||
exist=self.user_collection.find_one(query_dict)
|
||||
print(exist)
|
||||
if exist:
|
||||
return True
|
||||
return False
|
||||
|
||||
def login(self,username,password):
|
||||
exist = self.user_collection.find_one({
|
||||
"username":username,
|
||||
"password":password
|
||||
})
|
||||
if exist is not None:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def check_code(self):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
def Regist(infos,**params):
|
||||
'''
|
||||
注册信息
|
||||
:param infos:dict
|
||||
:return:
|
||||
'''
|
||||
try:
|
||||
code=params.get("check_code")
|
||||
email=infos.get("email")
|
||||
if em.check_input(code,email):
|
||||
a=UserModel.objects().create(**infos)
|
||||
return True
|
||||
return False
|
||||
except:
|
||||
traceback.print_exc()
|
||||
return False
|
||||
|
||||
def sendCode(email):
|
||||
send=em.send_email(email)
|
||||
if send:
|
||||
print("发送成功:",send)
|
||||
return True
|
||||
return False
|
||||
|
||||
if __name__ == '__main__':
|
||||
up=UserProcess()
|
||||
a=up.check_exist({"username":"admin","password":"admin"})
|
||||
# if sendCode("526494747@qq.com"):
|
||||
pass
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user