假设有CSV文件(部分):suzhou.csv
要对其进行回归分析并输出图像:
import numpy as np from sklearn.tree import DecisionTreeRegressor import matplotlib.pyplot as plt data = np.genfromtxt("/suzhou.csv",delimiter=",",dtype=int,skip_header=1,usecols=np.arange(0,6)) data = data.transpose() X = np.array(data[0]).reshape(len(data[0]),1) y = np.array(data[4]).reshape(len(data[4]),1) regr_1 = DecisionTreeRegressor(max_depth=2, presort=False) regr_2 = DecisionTreeRegressor(max_depth=5, presort=False) regr_1.fit(X, y) regr_2.fit(X, y) X_test = np.arange(2007, 2018, 0.9)[:, np.newaxis].astype(int) y_1 = regr_1.predict(X_test) y_2 = regr_2.predict(X_test) x_axis = range(2007,2018,2) plt.figure() plt.scatter(X, y, s=20, edgecolor="black", c="darkorange", label="data") plt.plot(X_test, y_1, color="cornflowerblue", label="Decision Tree Depth=2", linewidth=2) plt.plot(X_test, y_2, color="yellowgreen", label="Decision Tree Depth=5", linewidth=2) plt.xticks(x_axis) plt.xlabel("Year") plt.ylabel("Gross Industrial Output") plt.title("Total Industrial Output Value of Enterprises in Suzhou") plt.legend() plt.show()
输出图像: