# 数据处理

1. # 加载数据
2. import pandas as pd
4. credits_data.shape# 查看数据的维度

1. print('-------------------------------------统计量描述-------------------------------------')
2. explore = credits_data.describe(percentiles=[], include='all').T # percentiles参数是指定计算多少的分位数表
3. explore['null'] = len(credits_data) - explore['count'] # describe()函数自动计算非空值数，需要手动计算空值数
5. explore = explore[['null', 'max', 'min','mean']]
6. explore.columns = [u'空值数', u'最大值', u'最小值',u'平均值']  # 表头重命名
7. # explore.to_csv('data/项目一/credits_data统计量描述.csv')  # 保存结果
8. print('--------------------------------------空值统计--------------------------------------')
9. print(credits_data.isnull().sum())

1. #导入tmdb_5000_movies表中的数据
3. data_movies.head(2)

# 数据划分

1. x=data_L.drop("vote_average",axis=1) #自变量
2. y=data_L["vote_average"]# 因变量

1. from sklearn.model_selection import train_test_split
2. #划分数据集 训练集80%测试集20%
3. x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=42)

# 数据建模

1. import numpy as np
2. import matplotlib.pyplot as plt
3. from sklearn.ensemble import RandomForestRegressor
4. from sklearn.model_selection import train_test_split
5. from sklearn.multioutput import MultiOutputRegressor
6. # #定义模型
7. regr_rf = RandomForestRegressor()
8. # 集合模型
9. regr_rf.fit(x_train, y_train)
10. # 利用预测
11. y_rf = regr_rf.predict(x_test)
12. #评价
13. print(regr_rf.score(x_test, y_test))
14. # y_rf.round(1)

# 模型评估

1. import numpy as np
2. import matplotlib.pyplot as plt
3. from sklearn.ensemble import RandomForestRegressor
4. from sklearn.model_selection import train_test_split
5. from sklearn.multioutput import MultiOutputRegressor
6. from sklearn.model_selection import train_test_split
7. x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=93)
8. # #定义模型 决策树的个数设置150 树的最大深度10
9. regr_rf = RandomForestRegressor(n_estimators=150,max_depth=10,random_state=0)
10.
11. # 集合模型
12. regr_rf.fit(x_train, y_train)
13. # 利用预测
14. y_rf = regr_rf.predict(x_test)
15. #评价
16. print(regr_rf.score(x_test, y_test))

# 结果预测

