决策树(1)

简介: 决策树(1)
import java.util.HashMap;
import java.util.Map;
import scala.Tuple2;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.mllib.tree.DecisionTree;
import org.apache.spark.mllib.tree.model.DecisionTreeModel;
import org.apache.spark.mllib.util.MLUtils;
public class DecisionTreeRegression{
    public static void main(String[] args) {
        // TODO Auto-generated method stub
      SparkConf sparkConf = new SparkConf(). setAppName ("JavaDecisionTreeClassificationExample");
      sparkConf . setMaster("local[2]");
      JavaSparkContext jsc = new JavaSparkContext (sparkConf);
      // Load and parse the data file.
      String datapath =
      "file:///home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/sample_libsvm_data.txt";
      JavaRDD<LabeledPoint> data = MLUtils. loadLibSVMFile(jsc.sc(), datapath).toJavaRDD() ;
      // Split the data into training and test sets (30% held out for testing)
      JavaRDD<LabeledPoint>[] splits = data. randomSplit(new double[]{0.7, 0.3});
      JavaRDD<LabeledPoint> trainingData = splits[0];
      JavaRDD<LabeledPoint> testData = splits[1] ;
      // Set parameters.
      // Empty categoricalFeaturesInfo indicates all features are cont inuous .
      Integer numClasses = 2; //类别数量
      Map<Integer, Integer> categoricalFeaturesInfo = new HashMap<Integer, Integer>() ;
      /*衡量分类的质量。 支持的标准有"gini" ,代表的是Gini impurity(不纯度,即无序程度)与“entropy"代表的是
      information gain(信息增益)
      */
      String impurity = "gini";
      Integer maxDepth = 5; // 最大深度
      Integer maxBins = 32; // 最大划分数
      // Train a DecisionTree model for classification.
      final DecisionTreeModel model = DecisionTree . trainClassifier(trainingData,
      numClasses , categoricalFeaturesInfo, impurity, maxDepth, maxBins);
      // Evaluate model on test instances and compute test error
      JavaPairRDD<Double,Double> predictionAndLabel =
      testData.mapToPair(new PairFunction<LabeledPoint, Double, Double>() {
      public Tuple2<Double,Double> call(LabeledPoint p) {
      return new Tuple2<Double, Double> (model. predict(p. features()),p.label());
      }
      });
      Double testErr =1.0
      * predictionAndLabel. filter(new Function<Tuple2<Double,Double>, Boolean>()
      {
      public Boolean call(Tuple2<Double,Double> pl) {
      return !pl._1(). equals(pl._2());
      }
      }). count() / testData . count();
      System. out . println("Test Error: "+ testErr);
      System. out . println("Learned classification tree model:\n" + model. toDebugString());
    }
    }

相关文章
VUE.初始化项目报错缺少core-js
VUE.初始化项目报错缺少core-js
282 0
|
应用服务中间件 Apache
springmvc中报错Request processing failed;
springmvc中报错Request processing failed;
|
JavaScript 关系型数据库 MySQL
fastadmin 部署phpstudy,初步学习fastadmin,保姆级详细讲解
fastadmin 部署phpstudy,初步学习fastadmin,保姆级详细讲解
996 0
|
资源调度 前端开发 JavaScript
React 的antd-mobile 组件库,嵌套路由
React 的antd-mobile 组件库,嵌套路由
509 0
|
JSON 数据格式
appium driver install uiautomator2 安装失败
appium driver install uiautomator2 安装失败
592 6
|
数据处理 Python
【Python】已解决:SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFram
【Python】已解决:SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFram
2826 1
|
安全 Go 开发者
Go语言map并发安全使用的正确姿势
在Go并发编程中,由于普通map不是线程安全的,多goroutine访问可能导致数据竞态。为保证安全,可使用`sync.Mutex`封装map或使用从Go 1.9开始提供的`sync.Map`。前者通过加锁手动同步,后者内置并发控制,适用于多goroutine共享。选择哪种取决于具体场景和性能需求。
428 0
|
前端开发 数据可视化 UED
【Web 前端】标签上title与alt属性有什么区别?
【4月更文挑战第22天】【Web 前端】标签上title与alt属性有什么区别?
|
开发者
同济大学系统结构 实验一:MIPS指令系统和MIPS体系结构-4
同济大学系统结构 实验一:MIPS指令系统和MIPS体系结构-4
748 0
同济大学系统结构 实验一:MIPS指令系统和MIPS体系结构-4
|
Shell Linux 网络安全
Termux安装Linux
Termux安装Linux
1137 2