共享算法
import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.mllib.linalg.Vectors; import org.apache.spark.mllib.recommendation.Rating; import org.apache.spark.mllib.regression.LabeledPoint; import org.apache.spark.mllib.regression.LinearRegressionModel; import org.apache.spark.mllib.regression.LinearRegressionWithSGD; import scala.Tuple2; public class xxhg { public static void main(String[] args) { // TODO Auto-generated method stub SparkConf conf=new SparkConf().setAppName("als").setMaster("local"); JavaSparkContext sc=new JavaSparkContext(conf); String path="file:///home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/ridge-data/jj.data"; JavaRDD<String> data=sc.textFile(path); JavaRDD<LabeledPoint> parseData= data.map(f->{ String[] parts=f.split(","); String[] features=parts[1].split(" "); double[] v=new double[features.length]; for(int i=0;i<features.length-1;i++) { v[i]=Double.valueOf(features[i]); } return new LabeledPoint(Double.parseDouble(parts[0]),Vectors.dense(v)); }); //LabeledPoint(Double.parseDouble(parts[0]),Vectors.dense(v)); parseData.cache(); int numIters=100; double stepSize=0.00000001; LinearRegressionModel model=LinearRegressionWithSGD.train(parseData.rdd(), numIters,stepSize); JavaPairRDD<Double,Double> values=parseData.mapToPair(f-> new Tuple2<>(model.predict(f.features()),f.label())); values.foreach(f->System.out.println(f)); double MSE=values.mapToDouble(f->{ double diff=f._1()-f._2(); return diff*diff; }).mean();//.mean()返回平均值 System.out.println("training Mean Squared Error="+MSE); } }
Java HotSpot(TM) 64-Bit Server VM warning: ignoring option PermSize=256m; support was removed in 8.0 Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=768m; support was removed in 8.0 2021-12-30 13:07:01 WARN Utils:66 - Your hostname, gyq-virtual-machine resolves to a loopback address: 127.0.1.1; using 192.168.159.130 instead (on interface ens33) 2021-12-30 13:07:01 WARN Utils:66 - Set SPARK_LOCAL_IP if you need to bind to another address 2021-12-30 13:07:02 INFO SparkContext:54 - Running Spark version 2.3.2 2021-12-30 13:07:04 WARN NativeCodeLoader:62 - Unable to load native-hadoop library for your platform... using builtin-java classes where applicable 2021-12-30 13:07:05 INFO SparkContext:54 - Submitted application: als 2021-12-30 13:07:05 INFO SecurityManager:54 - Changing view acls to: gyq 2021-12-30 13:07:05 INFO SecurityManager:54 - Changing modify acls to: gyq 2021-12-30 13:07:05 INFO SecurityManager:54 - Changing view acls groups to: 2021-12-30 13:07:05 INFO SecurityManager:54 - Changing modify acls groups to: 2021-12-30 13:07:05 INFO SecurityManager:54 - SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(gyq); groups with view permissions: Set(); users with modify permissions: Set(gyq); groups with modify permissions: Set() 2021-12-30 13:07:07 INFO Utils:54 - Successfully started service 'sparkDriver' on port 44105. 2021-12-30 13:07:08 INFO SparkEnv:54 - Registering MapOutputTracker 2021-12-30 13:07:08 INFO SparkEnv:54 - Registering BlockManagerMaster 2021-12-30 13:07:08 INFO BlockManagerMasterEndpoint:54 - Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information 2021-12-30 13:07:08 INFO BlockManagerMasterEndpoint:54 - BlockManagerMasterEndpoint up 2021-12-30 13:07:08 INFO DiskBlockManager:54 - Created local directory at /tmp/blockmgr-109263cd-d100-439c-b707-8c434a3f5ee4 2021-12-30 13:07:08 INFO MemoryStore:54 - MemoryStore started with capacity 161.4 MB 2021-12-30 13:07:08 INFO SparkEnv:54 - Registering OutputCommitCoordinator 2021-12-30 13:07:09 INFO log:192 - Logging initialized @14054ms 2021-12-30 13:07:09 INFO Server:351 - jetty-9.3.z-SNAPSHOT, build timestamp: unknown, git hash: unknown 2021-12-30 13:07:10 INFO Server:419 - Started @14619ms 2021-12-30 13:07:10 WARN Utils:66 - Service 'SparkUI' could not bind on port 4040. Attempting port 4041. 2021-12-30 13:07:10 WARN Utils:66 - Service 'SparkUI' could not bind on port 4041. Attempting port 4042. 2021-12-30 13:07:10 INFO AbstractConnector:278 - Started ServerConnector@4eee1714{HTTP/1.1,[http/1.1]}{0.0.0.0:4042} 2021-12-30 13:07:10 INFO Utils:54 - Successfully started service 'SparkUI' on port 4042. 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@4ef27d66{/jobs,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@591e58fa{/jobs/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@3954d008{/jobs/job,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@593e824f{/jobs/job/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@72ccd81a{/stages,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6d8792db{/stages/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@64bc21ac{/stages/stage,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@ce5a68e{/stages/stage/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@9d157ff{/stages/pool,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2f162cc0{/stages/pool/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@5df417a7{/storage,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@7c041b41{/storage/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@7f69d591{/storage/rdd,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@61078690{/storage/rdd/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@1cb3ec38{/environment,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@403132fc{/environment/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@71c5b236{/executors,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2cab9998{/executors/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@2f7a7219{/executors/threadDump,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@669513d8{/executors/threadDump/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@3a1d593e{/static,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@32232e55{/,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@5217f3d0{/api,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6fa590ba{/jobs/job/kill,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@6e9319f{/stages/stage/kill,null,AVAILABLE,@Spark} 2021-12-30 13:07:10 INFO SparkUI:54 - Bound SparkUI to 0.0.0.0, and started at http://192.168.159.130:4042 2021-12-30 13:07:11 INFO Executor:54 - Starting executor ID driver on host localhost 2021-12-30 13:07:11 INFO Utils:54 - Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 33217. 2021-12-30 13:07:11 INFO NettyBlockTransferService:54 - Server created on 192.168.159.130:33217 2021-12-30 13:07:11 INFO BlockManager:54 - Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy 2021-12-30 13:07:12 INFO BlockManagerMaster:54 - Registering BlockManager BlockManagerId(driver, 192.168.159.130, 33217, None) 2021-12-30 13:07:12 INFO BlockManagerMasterEndpoint:54 - Registering block manager 192.168.159.130:33217 with 161.4 MB RAM, BlockManagerId(driver, 192.168.159.130, 33217, None) 2021-12-30 13:07:12 INFO BlockManagerMaster:54 - Registered BlockManager BlockManagerId(driver, 192.168.159.130, 33217, None) 2021-12-30 13:07:12 INFO BlockManager:54 - Initialized BlockManager: BlockManagerId(driver, 192.168.159.130, 33217, None) 2021-12-30 13:07:13 INFO ContextHandler:781 - Started o.s.j.s.ServletContextHandler@288a4658{/metrics/json,null,AVAILABLE,@Spark} 2021-12-30 13:07:23 INFO MemoryStore:54 - Block broadcast_0 stored as values in memory (estimated size 236.7 KB, free 161.2 MB) 2021-12-30 13:07:23 INFO MemoryStore:54 - Block broadcast_0_piece0 stored as bytes in memory (estimated size 22.9 KB, free 161.1 MB) 2021-12-30 13:07:23 INFO BlockManagerInfo:54 - Added broadcast_0_piece0 in memory on 192.168.159.130:33217 (size: 22.9 KB, free: 161.4 MB) 2021-12-30 13:07:24 INFO SparkContext:54 - Created broadcast 0 from textFile at xxhg.java:20 2021-12-30 13:07:26 INFO FileInputFormat:249 - Total input paths to process : 1 2021-12-30 13:07:26 INFO SparkContext:54 - Starting job: first at GeneralizedLinearAlgorithm.scala:204 2021-12-30 13:07:27 INFO DAGScheduler:54 - Got job 0 (first at GeneralizedLinearAlgorithm.scala:204) with 1 output partitions 2021-12-30 13:07:27 INFO DAGScheduler:54 - Final stage: ResultStage 0 (first at GeneralizedLinearAlgorithm.scala:204) 2021-12-30 13:07:27 INFO DAGScheduler:54 - Parents of final stage: List() 2021-12-30 13:07:27 INFO DAGScheduler:54 - Missing parents: List() 2021-12-30 13:07:27 INFO DAGScheduler:54 - Submitting ResultStage 0 (MapPartitionsRDD[3] at map at GeneralizedLinearAlgorithm.scala:204), which has no missing parents 2021-12-30 13:07:27 INFO MemoryStore:54 - Block broadcast_1 stored as values in memory (estimated size 4.8 KB, free 161.1 MB) 2021-12-30 13:07:27 INFO MemoryStore:54 - Block broadcast_1_piece0 stored as bytes in memory (estimated size 2.8 KB, free 161.1 MB) 2021-12-30 13:07:27 INFO BlockManagerInfo:54 - Added broadcast_1_piece0 in memory on 192.168.159.130:33217 (size: 2.8 KB, free: 161.4 MB) 2021-12-30 13:07:27 INFO SparkContext:54 - Created broadcast 1 from broadcast at DAGScheduler.scala:1039 2021-12-30 13:07:27 INFO DAGScheduler:54 - Submitting 1 missing tasks from ResultStage 0 (MapPartitionsRDD[3] at map at GeneralizedLinearAlgorithm.scala:204) (first 15 tasks are for partitions Vector(0)) 2021-12-30 13:07:27 INFO TaskSchedulerImpl:54 - Adding task set 0.0 with 1 tasks 2021-12-30 13:07:28 INFO TaskSetManager:54 - Starting task 0.0 in stage 0.0 (TID 0, localhost, executor driver, partition 0, PROCESS_LOCAL, 7924 bytes) 2021-12-30 13:07:28 INFO Executor:54 - Running task 0.0 in stage 0.0 (TID 0) 2021-12-30 13:07:28 INFO HadoopRDD:54 - Input split: file:/home/gyq/下载/spark-2.3.2-bin-hadoop2.7/data/mllib/ridge-data/jj.data:0+634 2021-12-30 13:07:28 INFO MemoryStore:54 - Block rdd_2_0 stored as values in memory (estimated size 512.0 B, free 161.1 MB) 2021-12-30 13:07:28 INFO BlockManagerInfo:54 - Added rdd_2_0 in memory on 192.168.159.130:33217 (size: 512.0 B, free: 161.4 MB) 2021-12-30 13:07:29 INFO Executor:54 - 1 block locks were not released by TID = 0: [rdd_2_0] 2021-12-30 13:07:29 INFO Executor:54 - Finished task 0.0 in stage 0.0 (TID 0). 781 bytes result sent to driver 2021-12-30 13:07:29 INFO TaskSetManager:54 - Finished task 0.0 in stage 0.0 (TID 0) in 1338 ms on localhost (executor driver) (1/1) 2021-12-30 13:07:29 INFO TaskSchedulerImpl:54 - Removed TaskSet 0.0, whose tasks have all completed, from pool 2021-12-30 13:07:29 INFO DAGScheduler:54 - ResultStage 0 (first at GeneralizedLinearAlgorithm.scala:204) finished in 1.830 s 2021-12-30 13:07:29 INFO DAGScheduler:54 - Job 0 finished: first at GeneralizedLinearAlgorithm.scala:204, took 2.391373 s 2021-12-30 13:07:29 INFO SparkContext:54 - Starting job: count at GradientDescent.scala:209 2021-12-30 13:07:29 INFO DAGScheduler:54 - Got job 1 (count at GradientDescent.scala:209) with 1 output partitions 2021-12-30 13:07:29 INFO DAGScheduler:54 - Final stage: ResultStage 1 (count at GradientDescent.scala:209) 2021-12-30 13:07:29 INFO DAGScheduler:54 - Parents of final stage: List() 2021-12-30 13:07:29 INFO DAGScheduler:54 - Missing parents: List() 2021-12-30 13:07:29 INFO DAGScheduler:54 - Submitting ResultStage 1 (MapPartitionsRDD[4] at map at GeneralizedLinearAlgorithm.scala:297), which has no missing parents 2021-12-30 13:07:29 INFO MemoryStore:54 - Block broadcast_2 stored as values in memory (estimated size 4.5 KB, free 161.1 MB) 2021-12-30 13:07:29 INFO MemoryStore:54 - Block broadcast_2_piece0 stored as bytes in memory (estimated size 2.7 KB, free 161.1 MB) 2021-12-30 13:07:29 INFO BlockManagerInfo:54 - Added broadcast_2_piece0 in memory on 192.168.159.130:33217 (size: 2.7 KB, free: 161.4 MB) 2021-12-30 13:07:29 INFO SparkContext:54 - Created broadcast 2 from broadcast at DAGScheduler.scala:1039 2021-12-30 13:07:29 INFO DAGScheduler:54 - Submitting 1 missing tasks from ResultStage 1 (MapPartitionsRDD[4] at map at GeneralizedLinearAlgorithm.scala:297) (first 15 tasks are for partitions Vector(0)) 2021-12-30 13:07:29 INFO TaskSchedulerImpl:54 - Adding task set 1.0 with 1 tasks 2021-12-30 13:07:29 INFO TaskSetManager:54 - Starting task 0.0 in stage 1.0 (TID 1, localhost, executor driver, partition 0, PROCESS_LOCAL, 7924 bytes) 2021-12-30 13:07:29 INFO Executor:54 - Running task 0.0 in stage 1.0 (TID 1) 2021-12-30 13:07:29 INFO BlockManager:54 - Found block rdd_2_0 locally 2021-12-30 13:07:29 INFO Executor:54 - Finished task 0.0 in stage 1.0 (TID 1). 875 bytes result sent to driver 2021-12-30 13:07:29 INFO TaskSetManager:54 - Finished task 0.0 in stage 1.0 (TID 1) in 49 ms on localhost (executor driver) (1/1) 2021-12-30 13:07:29 INFO DAGScheduler:54 - ResultStage 1 (count at GradientDescent.scala:209) finished in 0.104 s 2021-12-30 13:07:29 INFO DAGScheduler:54 - Job 1 finished: count at GradientDescent.scala:209, took 0.113471 s 2021-12-30 13:07:29 INFO TaskSchedulerImpl:54 - Removed TaskSet 1.0, whose tasks have all completed, from pool 2021-12-30 13:07:33 INFO MemoryStore:54 - Block broadcast_3 stored as values in memory (estimated size 120.0 B, free 161.1 MB) 2021-12-30 13:07:33 INFO MemoryStore:54 - Block broadcast_3_piece0 stored as bytes in memory (estimated size 153.0 B, free 161.1 MB) 2021-12-30 13:07:33 INFO BlockManagerInfo:54 - Added broadcast_3_piece0 in memory on 192.168.159.130:33217 (size: 153.0 B, free: 161.4 MB) 2021-12-30 13:07:33 INFO SparkContext:54 - Created broadcast 3 from broadcast at GradientDescent.scala:235 2021-12-30 13:07:34 INFO SparkContext:54 - Starting job: treeAggregate at GradientDescent.scala:239 2021-12-30 13:07:34 INFO DAGScheduler:54 - Got job 2 (treeAggregate at GradientDescent.scala:239) with 1 output partitions 2021-12-30 13:07:34 INFO DAGScheduler:54 - Final stage: ResultStage 2 (treeAggregate at GradientDescent.scala:239) 2021-12-30 13:07:34 INFO DAGScheduler:54 - Parents of final stage: List() 2021-12-30 13:07:34 INFO DAGScheduler:54 - Missing parents: List() 2021-12-30 13:07:34 INFO DAGScheduler:54 - Submitting ResultStage 2 (MapPartitionsRDD[6] at treeAggregate at GradientDescent.scala:239), which has no missing parents 2021-12-30 13:07:34 INFO MemoryStore:54 - Block broadcast_4 stored as values in memory (estimated size 6.9 KB, free 161.1 MB) 2021-12-30 13:07:34 INFO MemoryStore:54 - Block broadcast_4_piece0 stored as bytes in memory (estimated size 3.8 KB, free 161.1 MB) 2021-12-30 13:07:34 INFO BlockManagerInfo:54 - Added broadcast_4_piece0 in memory on 192.168.159.130:33217 (size: 3.8 KB, free: 161.4 MB) 2021-12-30 13:07:34 INFO SparkContext:54 - Created broadcast 4 from broadcast at DAGScheduler.scala:1039 2021-12-30 13:07:34 INFO DAGScheduler:54 - Submitting 1 missing tasks from ResultStage 2 (MapPartitionsRDD[6] at treeAggregate at GradientDescent.scala:239) (first 15 tasks are for partitions Vector(0)) 2021-12-30 13:07:34 INFO TaskSchedulerImpl:54 - Adding task set 2.0 with 1 tasks 2021-12-30 13:07:34 INFO TaskSetManager:54 - Starting task 0.0 in stage 2.0 (TID 2, localhost, executor driver, partition 0, PROCESS_LOCAL, 8033 bytes) 2021-12-30 13:07:34 INFO Executor:54 - Running task 0.0 in stage 2.0 (TID 2) 2021-12-30 13:07:34 INFO BlockManager:54 - Found block rdd_2_0 locally 2021-12-30 13:07:34 WARN BLAS:61 - Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS 2021-12-30 13:07:34 WARN BLAS:61 - Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS 2021-12-30 13:07:34 INFO Executor:54 - Finished task 0.0 in stage 2.0 (TID 2). 1158 bytes result sent to driver 2021-12-30 13:07:34 INFO TaskSetManager:54 - Finished task 0.0 in stage 2.0 (TID 2) in 131 ms on localhost (executor driver) (1/1) 2021-12-30 13:07:34 INFO TaskSchedulerImpl:54 - Removed TaskSet 2.0, whose tasks have all completed, from pool 2021-12-30 13:07:34 INFO DAGScheduler:54 - ResultStage 2 (treeAggregate at GradientDescent.scala:239) finished in 0.284 s 2021-12-30 13:07:34 INFO DAGScheduler:54 - Job 2 finished: treeAggregate at GradientDescent.scala:239, took 0.292528 s 2021-12-30 13:07:34 INFO TorrentBroadcast:54 - Destroying Broadcast(3) (from destroy at GradientDescent.scala:249) 2021-12-30 13:07:34 INFO MemoryStore:54 - Block broadcast_5 stored as values in memory (estimated size 120.0 B, free 161.1 MB) 2021-12-30 13:07:34 INFO MemoryStore:54 - Block broadcast_5_piece0 stored as bytes in memory (estimated size 205.0 B, free 161.1 MB) 2021-12-30 13:07:34 INFO BlockManagerInfo:54 - Added broadcast_5_piece0 in memory on 192.168.159.130:33217 (size: 205.0 B, free: 161.4 MB) 2021-12-30 13:07:34 INFO SparkContext:54 - Created broadcast 5 from broadcast at GradientDescent.scala:235 2021-12-30 13:07:34 INFO BlockManagerInfo:54 - Removed broadcast_3_piece0 on 192.168.159.130:33217 in memory (size: 153.0 B, free: 161.4 MB) 2021-12-30 13:07:35 INFO SparkContext:54 - Starting job: treeAggregate at GradientDescent.scala:239 2021-12-30 13:07:35 INFO DAGScheduler:54 - Got job 3 (treeAggregate at GradientDescent.scala:239) with 1 output partitions 2021-12-30 13:07:35 INFO DAGScheduler:54 - Final stage: ResultStage 3 (treeAggregate at GradientDescent.scala:239) 2021-12-30 13:07:35 INFO DAGScheduler:54 - Parents of final stage: List() 2021-12-30 13:07:35 INFO DAGScheduler:54 - Missing parents: List() 2021-12-30 13:07:35 INFO DAGScheduler:54 - Submitting ResultStage 3 (MapPartitionsRDD[8] at treeAggregate at GradientDescent.scala:239), which has no missing parents 2021-12-30 13:07:35 INFO MemoryStore:54 - Block broadcast_6 stored as values in memory (estimated size 6.9 KB, free 161.1 MB) 2021-12-30 13:07:35 INFO MemoryStore:54 - Block broadcast_6_piece0 stored as bytes in memory (estimated size 3.8 KB, free 161.1 MB) 2021-12-30 13:07:35 INFO BlockManagerInfo:54 - Added broadcast_6_piece0 in memory on 192.168.159.130:33217 (size: 3.8 KB, free: 161.4 MB) 2021-12-30 13:07:35 INFO SparkContext:54 - Created broadcast 6 from broadcast at DAGScheduler.scala:1039 2021-12-30 13:07:35 INFO DAGScheduler:54 - Submitting 1 missing tasks from ResultStage 3 (MapPartitionsRDD[8] at treeAggregate at GradientDescent.scala:239) (first 15 tasks are for partitions Vector(0)) 2021-12-30 13:07:35 INFO TaskSchedulerImpl:54 - Adding task set 3.0 with 1 tasks 2021-12-30 13:07:35 INFO TaskSetManager:54 - Starting task 0.0 in stage 3.0 (TID 3, localhost, executor driver, partition 0, PROCESS_LOCAL, 8033 bytes) 2021-12-30 13:07:35 INFO Executor:54 - Running task 0.0 in stage 3.0 (TID 3) 2021-12-30 13:07:35 INFO BlockManager:54 - Found block rdd_2_0 locally 2021-12-30 13:07:35 INFO Executor:54 - Finished task 0.0 in stage 3.0 (TID 3). 1201 bytes result sent to driver 2021-12-30 13:07:35 INFO TaskSetManager:54 - Finished task 0.0 in stage 3.0 (TID 3) in 39 ms on localhost (executor driver) (1/1) 2021-12-30 13:07:35 INFO TaskSchedulerImpl:54 - Removed TaskSet 3.0, whose tasks have all completed, from pool 2021-12-30 13:07:35 INFO DAGScheduler:54 - ResultStage 3 (treeAggregate at GradientDescent.scala:239) finished in 0.115 s 2021-12-30 13:07:35 INFO DAGScheduler:54 - Job 3 finished: treeAggregate at GradientDescent.scala:239, took 0.141668 s 2021-12-30 13:07:35 INFO TorrentBroadcast:54 - Destroying Broadcast(5) (from destroy at GradientDescent.scala:249) 2021-12-30 13:07:35 INFO BlockManagerInfo:54 - Removed broadcast_5_piece0 on 192.168.159.130:33217 in memory (size: 205.0 B, free: 161.4 MB) 2021-12-30 13:07:35 INFO GradientDescent:54 - GradientDescent.runMiniBatchSGD finished. Last 10 stochastic losses 0.033101385688005004, 0.03310138080116999 2021-12-30 13:07:35 INFO SparkContext:54 - Starting job: foreach at xxhg.java:40 2021-12-30 13:07:35 INFO DAGScheduler:54 - Got job 4 (foreach at xxhg.java:40) with 1 output partitions 2021-12-30 13:07:35 INFO DAGScheduler:54 - Final stage: ResultStage 4 (foreach at xxhg.java:40) 2021-12-30 13:07:35 INFO DAGScheduler:54 - Parents of final stage: List() 2021-12-30 13:07:35 INFO DAGScheduler:54 - Missing parents: List() 2021-12-30 13:07:35 INFO DAGScheduler:54 - Submitting ResultStage 4 (MapPartitionsRDD[9] at mapToPair at xxhg.java:38), which has no missing parents 2021-12-30 13:07:35 INFO MemoryStore:54 - Block broadcast_7 stored as values in memory (estimated size 5.6 KB, free 161.1 MB) 2021-12-30 13:07:35 INFO MemoryStore:54 - Block broadcast_7_piece0 stored as bytes in memory (estimated size 3.1 KB, free 161.1 MB) 2021-12-30 13:07:35 INFO BlockManagerInfo:54 - Added broadcast_7_piece0 in memory on 192.168.159.130:33217 (size: 3.1 KB, free: 161.4 MB) 2021-12-30 13:07:35 INFO SparkContext:54 - Created broadcast 7 from broadcast at DAGScheduler.scala:1039 2021-12-30 13:07:35 INFO DAGScheduler:54 - Submitting 1 missing tasks from ResultStage 4 (MapPartitionsRDD[9] at mapToPair at xxhg.java:38) (first 15 tasks are for partitions Vector(0)) 2021-12-30 13:07:35 INFO TaskSchedulerImpl:54 - Adding task set 4.0 with 1 tasks 2021-12-30 13:07:35 INFO TaskSetManager:54 - Starting task 0.0 in stage 4.0 (TID 4, localhost, executor driver, partition 0, PROCESS_LOCAL, 7924 bytes) 2021-12-30 13:07:35 INFO Executor:54 - Running task 0.0 in stage 4.0 (TID 4) 2021-12-30 13:07:35 INFO BlockManager:54 - Found block rdd_2_0 locally (-4.203303429003143E-8,-0.4307829) (-3.2601926081988176E-8,-0.1625189) (-2.6924660473041324E-8,-0.1625189) (-3.438424206034633E-8,-0.1625189) 2021-12-30 13:07:35 INFO Executor:54 - Finished task 0.0 in stage 4.0 (TID 4). 837 bytes result sent to driver 2021-12-30 13:07:35 INFO TaskSetManager:54 - Finished task 0.0 in stage 4.0 (TID 4) in 260 ms on localhost (executor driver) (1/1) 2021-12-30 13:07:35 INFO TaskSchedulerImpl:54 - Removed TaskSet 4.0, whose tasks have all completed, from pool 2021-12-30 13:07:35 INFO DAGScheduler:54 - ResultStage 4 (foreach at xxhg.java:40) finished in 0.316 s 2021-12-30 13:07:35 INFO DAGScheduler:54 - Job 4 finished: foreach at xxhg.java:40, took 0.327307 s 2021-12-30 13:07:36 INFO SparkContext:54 - Starting job: mean at xxhg.java:46 2021-12-30 13:07:36 INFO DAGScheduler:54 - Got job 5 (mean at xxhg.java:46) with 1 output partitions 2021-12-30 13:07:36 INFO DAGScheduler:54 - Final stage: ResultStage 5 (mean at xxhg.java:46) 2021-12-30 13:07:36 INFO DAGScheduler:54 - Parents of final stage: List() 2021-12-30 13:07:36 INFO DAGScheduler:54 - Missing parents: List() 2021-12-30 13:07:36 INFO DAGScheduler:54 - Submitting ResultStage 5 (MapPartitionsRDD[12] at mean at xxhg.java:46), which has no missing parents 2021-12-30 13:07:36 INFO MemoryStore:54 - Block broadcast_8 stored as values in memory (estimated size 6.2 KB, free 161.1 MB) 2021-12-30 13:07:36 INFO MemoryStore:54 - Block broadcast_8_piece0 stored as bytes in memory (estimated size 3.4 KB, free 161.1 MB) 2021-12-30 13:07:36 INFO BlockManagerInfo:54 - Added broadcast_8_piece0 in memory on 192.168.159.130:33217 (size: 3.4 KB, free: 161.4 MB) 2021-12-30 13:07:36 INFO SparkContext:54 - Created broadcast 8 from broadcast at DAGScheduler.scala:1039 2021-12-30 13:07:36 INFO DAGScheduler:54 - Submitting 1 missing tasks from ResultStage 5 (MapPartitionsRDD[12] at mean at xxhg.java:46) (first 15 tasks are for partitions Vector(0)) 2021-12-30 13:07:36 INFO TaskSchedulerImpl:54 - Adding task set 5.0 with 1 tasks 2021-12-30 13:07:36 INFO TaskSetManager:54 - Starting task 0.0 in stage 5.0 (TID 5, localhost, executor driver, partition 0, PROCESS_LOCAL, 7924 bytes) 2021-12-30 13:07:36 INFO Executor:54 - Running task 0.0 in stage 5.0 (TID 5) 2021-12-30 13:07:36 INFO BlockManager:54 - Found block rdd_2_0 locally 2021-12-30 13:07:36 INFO Executor:54 - Finished task 0.0 in stage 5.0 (TID 5). 959 bytes result sent to driver 2021-12-30 13:07:36 INFO TaskSetManager:54 - Finished task 0.0 in stage 5.0 (TID 5) in 57 ms on localhost (executor driver) (1/1) 2021-12-30 13:07:36 INFO TaskSchedulerImpl:54 - Removed TaskSet 5.0, whose tasks have all completed, from pool 2021-12-30 13:07:36 INFO DAGScheduler:54 - ResultStage 5 (mean at xxhg.java:46) finished in 0.151 s 2021-12-30 13:07:36 INFO DAGScheduler:54 - Job 5 finished: mean at xxhg.java:46, took 0.163834 s training Mean Squared Error=0.0662027546913127 2021-12-30 13:07:36 INFO SparkContext:54 - Invoking stop() from shutdown hook 2021-12-30 13:07:36 INFO AbstractConnector:318 - Stopped Spark@4eee1714{HTTP/1.1,[http/1.1]}{0.0.0.0:4042} 2021-12-30 13:07:36 INFO SparkUI:54 - Stopped Spark web UI at http://192.168.159.130:4042 2021-12-30 13:07:36 INFO MapOutputTrackerMasterEndpoint:54 - MapOutputTrackerMasterEndpoint stopped! 2021-12-30 13:07:37 INFO MemoryStore:54 - MemoryStore cleared 2021-12-30 13:07:37 INFO BlockManager:54 - BlockManager stopped 2021-12-30 13:07:37 INFO BlockManagerMaster:54 - BlockManagerMaster stopped 2021-12-30 13:07:37 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint:54 - OutputCommitCoordinator stopped! 2021-12-30 13:07:37 INFO SparkContext:54 - Successfully stopped SparkContext 2021-12-30 13:07:37 INFO ShutdownHookManager:54 - Shutdown hook called 2021-12-30 13:07:37 INFO ShutdownHookManager:54 - Deleting directory /tmp/spark-f89c641f-23cd-46fd-9ec3-e54f2e9f8b12