From 77ea09c9a8be356bb9a442073822257b37f0b6c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=9C=E4=BD=B3=E7=92=90?= <13190718+du-jialulu@user.noreply.gitee.com> Date: Fri, 14 Jul 2023 02:19:28 +0000 Subject: [PATCH] =?UTF-8?q?=E6=9D=9C=E4=BD=B3=E7=92=90?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 杜佳璐 <13190718+du-jialulu@user.noreply.gitee.com> --- .../第5组实战/实例_数据处理_泰坦尼克号幸存.ipynb | 2786 +++++++++++++++++ 1 file changed, 2786 insertions(+) create mode 100644 2、幸福感数据分析/第5组-杜佳璐/第5组实战/实例_数据处理_泰坦尼克号幸存.ipynb diff --git a/2、幸福感数据分析/第5组-杜佳璐/第5组实战/实例_数据处理_泰坦尼克号幸存.ipynb b/2、幸福感数据分析/第5组-杜佳璐/第5组实战/实例_数据处理_泰坦尼克号幸存.ipynb new file mode 100644 index 0000000..e375b51 --- /dev/null +++ b/2、幸福感数据分析/第5组-杜佳璐/第5组实战/实例_数据处理_泰坦尼克号幸存.ipynb @@ -0,0 +1,2786 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import itertools\n", + "from scipy import stats,integrate\n", + "plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签\n", + "plt.rcParams['axes.unicode_minus']=False #用来正常显示负号" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "pycharm": { + "name": "#%% md\n" + } + }, + "source": [ + "## 1 数据预处理" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | PassengerId | \n", + "Survived | \n", + "Pclass | \n", + "Name | \n", + "Sex | \n", + "Age | \n", + "SibSp | \n", + "Parch | \n", + "Ticket | \n", + "Fare | \n", + "Cabin | \n", + "Embarked | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "1 | \n", + "0 | \n", + "3 | \n", + "Braund, Mr. Owen Harris | \n", + "male | \n", + "22.0 | \n", + "1 | \n", + "0 | \n", + "A/5 21171 | \n", + "7.2500 | \n", + "NaN | \n", + "S | \n", + "
| 1 | \n", + "2 | \n", + "1 | \n", + "1 | \n", + "Cumings, Mrs. John Bradley (Florence Briggs Th... | \n", + "female | \n", + "38.0 | \n", + "1 | \n", + "0 | \n", + "PC 17599 | \n", + "71.2833 | \n", + "C85 | \n", + "C | \n", + "
| 2 | \n", + "3 | \n", + "1 | \n", + "3 | \n", + "Heikkinen, Miss. Laina | \n", + "female | \n", + "26.0 | \n", + "0 | \n", + "0 | \n", + "STON/O2. 3101282 | \n", + "7.9250 | \n", + "NaN | \n", + "S | \n", + "
| 3 | \n", + "4 | \n", + "1 | \n", + "1 | \n", + "Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n", + "female | \n", + "35.0 | \n", + "1 | \n", + "0 | \n", + "113803 | \n", + "53.1000 | \n", + "C123 | \n", + "S | \n", + "
| 4 | \n", + "5 | \n", + "0 | \n", + "3 | \n", + "Allen, Mr. William Henry | \n", + "male | \n", + "35.0 | \n", + "0 | \n", + "0 | \n", + "373450 | \n", + "8.0500 | \n", + "NaN | \n", + "S | \n", + "
| \n", + " | PassengerId | \n", + "Survived | \n", + "Pclass | \n", + "Age | \n", + "SibSp | \n", + "Parch | \n", + "Fare | \n", + "
|---|---|---|---|---|---|---|---|
| count | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "714.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "
| mean | \n", + "446.000000 | \n", + "0.383838 | \n", + "2.308642 | \n", + "29.699118 | \n", + "0.523008 | \n", + "0.381594 | \n", + "32.204208 | \n", + "
| std | \n", + "257.353842 | \n", + "0.486592 | \n", + "0.836071 | \n", + "14.526497 | \n", + "1.102743 | \n", + "0.806057 | \n", + "49.693429 | \n", + "
| min | \n", + "1.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.420000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| 25% | \n", + "223.500000 | \n", + "0.000000 | \n", + "2.000000 | \n", + "20.125000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "7.910400 | \n", + "
| 50% | \n", + "446.000000 | \n", + "0.000000 | \n", + "3.000000 | \n", + "28.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "14.454200 | \n", + "
| 75% | \n", + "668.500000 | \n", + "1.000000 | \n", + "3.000000 | \n", + "38.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "31.000000 | \n", + "
| max | \n", + "891.000000 | \n", + "1.000000 | \n", + "3.000000 | \n", + "80.000000 | \n", + "8.000000 | \n", + "6.000000 | \n", + "512.329200 | \n", + "
| \n", + " | Survived | \n", + "Pclass | \n", + "Sex | \n", + "Age | \n", + "SibSp | \n", + "Parch | \n", + "Ticket | \n", + "Fare | \n", + "Cabin | \n", + "Embarked | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "22.0 | \n", + "1 | \n", + "0 | \n", + "A/5 21171 | \n", + "7.2500 | \n", + "NaN | \n", + "S | \n", + "
| 1 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "38.0 | \n", + "1 | \n", + "0 | \n", + "PC 17599 | \n", + "71.2833 | \n", + "C85 | \n", + "C | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "female | \n", + "26.0 | \n", + "0 | \n", + "0 | \n", + "STON/O2. 3101282 | \n", + "7.9250 | \n", + "NaN | \n", + "S | \n", + "
| 3 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "35.0 | \n", + "1 | \n", + "0 | \n", + "113803 | \n", + "53.1000 | \n", + "C123 | \n", + "S | \n", + "
| 4 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "35.0 | \n", + "0 | \n", + "0 | \n", + "373450 | \n", + "8.0500 | \n", + "NaN | \n", + "S | \n", + "
| \n", + " | Embarked | \n", + "Survived | \n", + "
|---|---|---|
| 0 | \n", + "C | \n", + "0.553571 | \n", + "
| 1 | \n", + "Q | \n", + "0.389610 | \n", + "
| 2 | \n", + "S | \n", + "0.336957 | \n", + "
| \n", + " | AgeLevel | \n", + "Survived | \n", + "
|---|---|---|
| 0 | \n", + "(0.34, 20.315] | \n", + "0.428571 | \n", + "
| 1 | \n", + "(20.315, 40.21] | \n", + "0.369732 | \n", + "
| 2 | \n", + "(40.21, 60.105] | \n", + "0.398601 | \n", + "
| 3 | \n", + "(60.105, 80.0] | \n", + "0.217391 | \n", + "
| \n", + " | Survived | \n", + "Pclass | \n", + "Sex | \n", + "Age | \n", + "SibSp | \n", + "Parch | \n", + "Ticket | \n", + "Fare | \n", + "Cabin | \n", + "Embarked | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "A/5 21171 | \n", + "7.2500 | \n", + "NaN | \n", + "S | \n", + "
| 1 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "PC 17599 | \n", + "71.2833 | \n", + "C85 | \n", + "C | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "female | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "STON/O2. 3101282 | \n", + "7.9250 | \n", + "NaN | \n", + "S | \n", + "
| 3 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "113803 | \n", + "53.1000 | \n", + "C123 | \n", + "S | \n", + "
| 4 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "373450 | \n", + "8.0500 | \n", + "NaN | \n", + "S | \n", + "
| \n", + " | Survived | \n", + "Pclass | \n", + "Sex | \n", + "Age | \n", + "Ticket | \n", + "Fare | \n", + "Cabin | \n", + "Embarked | \n", + "IsAlone | \n", + "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "1 | \n", + "A/5 21171 | \n", + "7.2500 | \n", + "NaN | \n", + "S | \n", + "1 | \n", + "
| 1 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "1 | \n", + "PC 17599 | \n", + "71.2833 | \n", + "C85 | \n", + "C | \n", + "1 | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "female | \n", + "1 | \n", + "STON/O2. 3101282 | \n", + "7.9250 | \n", + "NaN | \n", + "S | \n", + "0 | \n", + "
| 3 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "1 | \n", + "113803 | \n", + "53.1000 | \n", + "C123 | \n", + "S | \n", + "1 | \n", + "
| 4 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "1 | \n", + "373450 | \n", + "8.0500 | \n", + "NaN | \n", + "S | \n", + "0 | \n", + "
| \n", + " | FareLevel | \n", + "Survived | \n", + "
|---|---|---|
| 0 | \n", + "(-0.001, 7.91] | \n", + "0.197309 | \n", + "
| 1 | \n", + "(7.91, 14.454] | \n", + "0.303571 | \n", + "
| 2 | \n", + "(14.454, 31.0] | \n", + "0.454955 | \n", + "
| 3 | \n", + "(31.0, 512.329] | \n", + "0.581081 | \n", + "
| \n", + " | Survived | \n", + "Pclass | \n", + "Sex | \n", + "Age | \n", + "Ticket | \n", + "Fare | \n", + "Cabin | \n", + "Embarked | \n", + "IsAlone | \n", + "
|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "1 | \n", + "A/5 21171 | \n", + "0 | \n", + "NaN | \n", + "S | \n", + "1 | \n", + "
| 1 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "1 | \n", + "PC 17599 | \n", + "3 | \n", + "C85 | \n", + "C | \n", + "1 | \n", + "
| 2 | \n", + "1 | \n", + "3 | \n", + "female | \n", + "1 | \n", + "STON/O2. 3101282 | \n", + "1 | \n", + "NaN | \n", + "S | \n", + "0 | \n", + "
| 3 | \n", + "1 | \n", + "1 | \n", + "female | \n", + "1 | \n", + "113803 | \n", + "3 | \n", + "C123 | \n", + "S | \n", + "1 | \n", + "
| 4 | \n", + "0 | \n", + "3 | \n", + "male | \n", + "1 | \n", + "373450 | \n", + "1 | \n", + "NaN | \n", + "S | \n", + "0 | \n", + "
| \n", + " | Survived | \n", + "男性 | \n", + "女性 | \n", + "港口S | \n", + "港口C | \n", + "港口Q | \n", + "一等舱 | \n", + "二等舱 | \n", + "三等舱 | \n", + "孩子 | \n", + "少年 | \n", + "中年 | \n", + "老人 | \n", + "便宜票价 | \n", + "普通票价 | \n", + "高级票价 | \n", + "豪华票价 | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "
| 1 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "
| 2 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| 3 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "
| 4 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "0 | \n", + "1 | \n", + "0 | \n", + "0 | \n", + "
| \n", + " | Survived | \n", + "男性 | \n", + "女性 | \n", + "港口S | \n", + "港口C | \n", + "港口Q | \n", + "一等舱 | \n", + "二等舱 | \n", + "三等舱 | \n", + "孩子 | \n", + "少年 | \n", + "中年 | \n", + "老人 | \n", + "便宜票价 | \n", + "普通票价 | \n", + "高级票价 | \n", + "豪华票价 | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "891.000000 | \n", + "
| mean | \n", + "0.383838 | \n", + "0.352413 | \n", + "0.647587 | \n", + "0.188552 | \n", + "0.086420 | \n", + "0.722783 | \n", + "0.242424 | \n", + "0.206510 | \n", + "0.551066 | \n", + "0.227834 | \n", + "0.585859 | \n", + "0.160494 | \n", + "0.025814 | \n", + "0.250281 | \n", + "0.243547 | \n", + "0.257015 | \n", + "0.249158 | \n", + "
| std | \n", + "0.486592 | \n", + "0.477990 | \n", + "0.477990 | \n", + "0.391372 | \n", + "0.281141 | \n", + "0.447876 | \n", + "0.428790 | \n", + "0.405028 | \n", + "0.497665 | \n", + "0.419670 | \n", + "0.492850 | \n", + "0.367270 | \n", + "0.158668 | \n", + "0.433418 | \n", + "0.429463 | \n", + "0.437233 | \n", + "0.432769 | \n", + "
| min | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| 25% | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| 50% | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "
| 75% | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "0.000000 | \n", + "0.500000 | \n", + "0.000000 | \n", + "1.000000 | \n", + "0.000000 | \n", + "
| max | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "1.000000 | \n", + "
DecisionTreeClassifier(criterion='entropy', splitter='random')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
DecisionTreeClassifier(criterion='entropy', splitter='random')
MLPClassifier(hidden_layer_sizes=(6, 6, 6), max_iter=1000, solver='lbfgs')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MLPClassifier(hidden_layer_sizes=(6, 6, 6), max_iter=1000, solver='lbfgs')
SVC(C=100, gamma=1)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
SVC(C=100, gamma=1)