diff --git a/大众点评数据集分析/第七组/第七组实践代码/Boston.py b/大众点评数据集分析/第七组/第七组实践代码/Boston.py new file mode 100644 index 0000000..c7eaf1d --- /dev/null +++ b/大众点评数据集分析/第七组/第七组实践代码/Boston.py @@ -0,0 +1,86 @@ +import tensorflow as tf +dataset_path = keras.utils.get_file("housing.data", +"https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data") +column_names = ['CRIM','ZN','INDUS','CHAS','NOX', +'RM', 'AGE', 'DIS','RAD','TAX','PTRATION', 'B', 'LSTAT', 'MEDV'] +raw_dataset = pd.read_csv(dataset_path, names=column_names, +na_values = "?", comment='\t', +sep=" ", skipinitialspace=True) +dataset = raw_dataset.copy() +#下面的函数用以返回最后n行。 +#dataset.tail(n=10) +# 将数据集分为训练集和测试集 +# p 为训练集所占数据比例 +p=0.8 +trainDataset = dataset.sample(frac=p,random_state=0) +testDataset = dataset.drop(trainDataset.index) + +import matplotlib.pyplot as plt +fig, ax = plt.subplots() +x = trainDataset['RM'] +y = trainDataset['MEDV'] +ax.scatter(x, y, edgecolors=(0, 0, 0)) +ax.set_xlabel('RM') +ax.set_ylabel('MEDV') +plt.show() +trainInput = trainDataset['RM'] +trainTarget = trainDataset['MEDV'] +testInput = testDataset['RM'] +testTarget = testDataset['MEDV'] + +model = keras.Sequential([ +layers.Dense(1, use_bias=True, input_shape=(1,)) +]) + +optimizer = tf.keras.optimizers.Adam( +learning_rate=0.01, beta_1=0.9, beta_2=0.99, epsilon=1e-05, amsgrad=False, +name='Adam') +model.compile(loss='mse', optimizer=optimizer, metrics=['mae','mse']) + + +n_idle_epochs = 100 +earlyStopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', +patience=n_idle_epochs, min_delta=0.01) +class NEPOCHLogger(tf.keras.callbacks.Callback): +def __init__(self,per_epoch=100): +'''3 实例 4 +display: Number of batches to wait before outputting loss +''' +self.seen = 0 +self.per_epoch = per_epoch +def on_epoch_end(self, epoch, logs=None): +if epoch % self.per_epoch == 0: +print('Epoch {}, loss {:.2f}, val_loss {:.2f}, +mae {:.2f}, val_mae {:.2f}, mse {:.2f}, val_mse {:.2f}'\ +.format(epoch, logs['loss'], logs['val_loss'],logs['mae'], +logs['val_mae'],logs['mse'], logs['val_mse'])) +log_display = NEPOCHLogger(per_epoch=100) +n_epochs = 2000 +history = model.fit( +trainDataOne, trainLabelOne, batch_size=256, +epochs=n_epochs, validation_split = 0.1, verbose=0, +callbacks=[earlyStopping,log_display]) +#打印训练集和验证集MAE +import numpy as np +import pandas as pd +import seaborn as sns +mae = np.asarray(history.history['mae']) +val_mae = np.asarray(history.history['val_mae']) +num_values = (len(mae)) +values = np.zeros((num_values,2), dtype=float) +values[:,0] = mae +values[:,1] = val_mae +steps = pd.RangeIndex(start=0,stop=num_values) +data = pd.DataFrame(values, steps, columns=["mae", "va-mae"]) +sns.set(style="whitegrid") +sns.lineplot(data=data, palette="tab10", linewidth=2.5) + +predictions = model.predict(testInput).flatten() +a = plt.axes(aspect='equal') +plt.scatter(predictions, testTarget, edgecolors=(0, 0, 0)) +plt.xlabel('True Values') +plt.ylabel('Predictions') +lims = [0, 50] +plt.xlim(lims) +plt.ylim(lims) +_ = plt.plot(lims, lims) \ No newline at end of file