From a395c9c6e029b26350f30274d1772f062b16fbf2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=BC=A0=E5=8D=93=E7=AB=8B?=
<13190677+zhang-zhuoli@user.noreply.gitee.com>
Date: Sat, 15 Jul 2023 12:05:21 +0000
Subject: [PATCH] =?UTF-8?q?=E9=9A=8F=E6=9C=BA=E6=A3=AE=E6=9E=97=E5=9B=9E?=
=?UTF-8?q?=E5=BD=92?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Signed-off-by: 张卓立 <13190677+zhang-zhuoli@user.noreply.gitee.com>
---
.../rf_reg.ipynb | 278 ++++++++++++++++++
1 file changed, 278 insertions(+)
create mode 100644 共享民宿平台担保交易房子评分的影响研究/rf_reg.ipynb
diff --git a/共享民宿平台担保交易房子评分的影响研究/rf_reg.ipynb b/共享民宿平台担保交易房子评分的影响研究/rf_reg.ipynb
new file mode 100644
index 0000000..8c130dd
--- /dev/null
+++ b/共享民宿平台担保交易房子评分的影响研究/rf_reg.ipynb
@@ -0,0 +1,278 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "67537bcc-9ece-42d8-a6a7-924966604450",
+ "metadata": {},
+ "source": [
+ "# 随机森林回归"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "8f588675-9e05-45c7-9203-15c52f7ddd05",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "import pandas as pd\n",
+ "from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor\n",
+ "from sklearn.metrics import mean_squared_error\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.model_selection import train_test_split"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "0a02061e-296a-4f6a-91af-c7fa27d46f17",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " host_response_rate | \n",
+ " host_acceptance_rate | \n",
+ " accommodates | \n",
+ " price | \n",
+ " number_of_reviews | \n",
+ " review_scores_rating | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1.00 | \n",
+ " 0.33 | \n",
+ " 2.0 | \n",
+ " 120.0 | \n",
+ " 90.0 | \n",
+ " 4.50 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1.00 | \n",
+ " 0.98 | \n",
+ " 2.0 | \n",
+ " 90.0 | \n",
+ " 351.0 | \n",
+ " 4.58 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 1.00 | \n",
+ " 0.98 | \n",
+ " 2.0 | \n",
+ " 66.0 | \n",
+ " 67.0 | \n",
+ " 4.52 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 1.00 | \n",
+ " 0.98 | \n",
+ " 1.0 | \n",
+ " 33.0 | \n",
+ " 297.0 | \n",
+ " 4.70 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 2.0 | \n",
+ " 45.0 | \n",
+ " 42.0 | \n",
+ " 4.98 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 203252 | \n",
+ " 1.00 | \n",
+ " 0.93 | \n",
+ " 4.0 | \n",
+ " 152.0 | \n",
+ " 1.0 | \n",
+ " 4.00 | \n",
+ "
\n",
+ " \n",
+ " | 203253 | \n",
+ " 1.00 | \n",
+ " 0.97 | \n",
+ " 2.0 | \n",
+ " 45.0 | \n",
+ " 1.0 | \n",
+ " 3.00 | \n",
+ "
\n",
+ " \n",
+ " | 203254 | \n",
+ " 1.00 | \n",
+ " 0.97 | \n",
+ " 2.0 | \n",
+ " 40.0 | \n",
+ " 1.0 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " | 203276 | \n",
+ " 0.99 | \n",
+ " 0.99 | \n",
+ " 2.0 | \n",
+ " 43.0 | \n",
+ " 1.0 | \n",
+ " 5.00 | \n",
+ "
\n",
+ " \n",
+ " | 203308 | \n",
+ " 1.00 | \n",
+ " 1.00 | \n",
+ " 3.0 | \n",
+ " 110.0 | \n",
+ " 1.0 | \n",
+ " 5.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
134835 rows × 6 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " host_response_rate host_acceptance_rate accommodates price \\\n",
+ "0 1.00 0.33 2.0 120.0 \n",
+ "1 1.00 0.98 2.0 90.0 \n",
+ "2 1.00 0.98 2.0 66.0 \n",
+ "3 1.00 0.98 1.0 33.0 \n",
+ "5 1.00 1.00 2.0 45.0 \n",
+ "... ... ... ... ... \n",
+ "203252 1.00 0.93 4.0 152.0 \n",
+ "203253 1.00 0.97 2.0 45.0 \n",
+ "203254 1.00 0.97 2.0 40.0 \n",
+ "203276 0.99 0.99 2.0 43.0 \n",
+ "203308 1.00 1.00 3.0 110.0 \n",
+ "\n",
+ " number_of_reviews review_scores_rating \n",
+ "0 90.0 4.50 \n",
+ "1 351.0 4.58 \n",
+ "2 67.0 4.52 \n",
+ "3 297.0 4.70 \n",
+ "5 42.0 4.98 \n",
+ "... ... ... \n",
+ "203252 1.0 4.00 \n",
+ "203253 1.0 3.00 \n",
+ "203254 1.0 1.00 \n",
+ "203276 1.0 5.00 \n",
+ "203308 1.0 5.00 \n",
+ "\n",
+ "[134835 rows x 6 columns]"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "variables = ['number_of_reviews', 'price', 'accommodates',\n",
+ " 'host_response_rate', 'host_acceptance_rate', 'review_scores_rating']\n",
+ "df = pd.read_csv('../data/2022-01(US_25).csv', usecols=variables)\n",
+ "df['price'] = df['price'].replace('\\$', '', regex=True)\n",
+ "df['price'] = df['price'].replace('\\,', '', regex=True).astype(float)\n",
+ "df[['host_response_rate', 'host_acceptance_rate']] = df[['host_response_rate',\n",
+ " 'host_acceptance_rate']].replace('\\%', '', regex=True).astype(float)*0.01\n",
+ "df[['number_of_reviews']] = df[['number_of_reviews']].astype(float)\n",
+ "for col in variables:\n",
+ " df[col] = df[col].astype(np.float32)\n",
+ " df = df[np.isnan(df[col]) != 1]\n",
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "35569391-e849-4a5e-adf5-25dfd849b2a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# 固定划分训练集和测试集\n",
+ "info = df.iloc[:, :-1].values\n",
+ "target = df.iloc[:, -1].values\n",
+ "# 标准化\n",
+ "stdscaler = StandardScaler()\n",
+ "info_train, info_test, target_train, target_test = train_test_split(\n",
+ " info, target, test_size=0.3,shuffle=True, random_state=420)\n",
+ "info_train = stdscaler.fit_transform(info_train)\n",
+ "info_test = stdscaler.transform(info_test)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "7ca33f2e-bcc8-4234-a27b-fa7e1df04030",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MSE:0.2333\n"
+ ]
+ }
+ ],
+ "source": [
+ "# randomforest回归\n",
+ "rf = RandomForestRegressor(n_estimators=100, random_state=0)\n",
+ "rf.fit(info_train, target_train)\n",
+ "target_pred = rf.predict(info_test)\n",
+ "print(\"MSE:%.4f\" % mean_squared_error(target_test, target_pred))"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}