SparkEnv在两个地方会被创建, 由于SparkEnv中包含了很多重要的模块, 比如BlockManager, 所以SparkEnv很重要
Driver端, 在SparkContext初始化的时候, SparkEnv会被创建
// Create the Spark execution environment (cache, map output tracker, etc) private[spark] val env = SparkEnv.createFromSystemProperties( "<driver>", // 表示是driver, 下面的executor则是executorid System.getProperty("spark.driver.host"), System.getProperty("spark.driver.port").toInt, true, isLocal) SparkEnv.set(env)
Executor端, 在executor初始化时被创建
// Initialize Spark environment (using system properties read above) val env = SparkEnv.createFromSystemProperties(executorId, slaveHostname, 0, false, false) SparkEnv.set(env)
SparkEnv Class
用于hold所有Spark运行时的环境对象, serializer, Akka actor system, block manager, and map output tracker等
/** * Holds all the runtime environment objects for a running Spark instance (either master or worker), * including the serializer, Akka actor system, block manager, map output tracker, etc. Currently * Spark code finds the SparkEnv through a thread-local variable, so each thread that accesses these * objects needs to have the right SparkEnv set. You can get the current environment with * SparkEnv.get (e.g. after creating a SparkContext) and set it with SparkEnv.set. */ class SparkEnv ( val executorId: String, val actorSystem: ActorSystem, val serializerManager: SerializerManager, val serializer: Serializer, val closureSerializer: Serializer, val cacheManager: CacheManager, val mapOutputTracker: MapOutputTracker, val shuffleFetcher: ShuffleFetcher, val broadcastManager: BroadcastManager, val blockManager: BlockManager, val connectionManager: ConnectionManager, val httpFileServer: HttpFileServer, val sparkFilesDir: String, val metricsSystem: MetricsSystem) { }
SparkEnv Object
scala使用伴生object当作类接口
除了基本的get和set
就是在createFromSystemProperties中创建了一堆很关键的对象
object SparkEnv extends Logging { private val env = new ThreadLocal[SparkEnv] // ThreadLocal,所以每个线程各访问各的 @volatile private var lastSetSparkEnv : SparkEnv = _ // 缓存最新更新的SparkEnv,并且volatile,便于其他线程获得 def set(e: SparkEnv) {
lastSetSparkEnv = e env.set(e) } /** * Returns the ThreadLocal SparkEnv, if non-null. Else returns the SparkEnv * previously set in any thread. */ def get: SparkEnv = { Option(env.get()).getOrElse(lastSetSparkEnv) // 没有local时, 可以用lastSetSparkEnv } /** * Returns the ThreadLocal SparkEnv. */ def getThreadLocal : SparkEnv = { env.get() // 只取到local的 } def createFromSystemProperties( executorId: String, hostname: String, port: Int, isDriver: Boolean, isLocal: Boolean): SparkEnv = { val (actorSystem, boundPort) = AkkaUtils.createActorSystem("spark", hostname, port) val classLoader = Thread.currentThread.getContextClassLoader // Create an instance of the class named by the given Java system property, or by // defaultClassName if the property is not set, and return it as a T def instantiateClass[T](propertyName: String, defaultClassName: String): T = { val name = System.getProperty(propertyName, defaultClassName) Class.forName(name, true, classLoader).newInstance().asInstanceOf[T] } val serializerManager = new SerializerManager val serializer = serializerManager.setDefault( System.getProperty("spark.serializer", "org.apache.spark.serializer.JavaSerializer")) val closureSerializer = serializerManager.get( System.getProperty("spark.closure.serializer", "org.apache.spark.serializer.JavaSerializer")) val connectionManager = blockManager.connectionManager val broadcastManager = new BroadcastManager(isDriver) val cacheManager = new CacheManager(blockManager)
// BlockManager
val blockManagerMaster = new BlockManagerMaster(registerOrLookup( // registerOrLookup表示只有在master上创建Actor对象, slave上只是创建ref "BlockManagerMaster", new BlockManagerMasterActor(isLocal))) val blockManager = new BlockManager(executorId, actorSystem, blockManagerMaster, serializer)
// MapOutputTracker
val mapOutputTracker = new MapOutputTracker() mapOutputTracker.trackerActor = registerOrLookup( // 同样只有在master创建actor对象 "MapOutputTracker", new MapOutputTrackerActor(mapOutputTracker))
// ShuffleFetcher val shuffleFetcher = instantiateClass[ShuffleFetcher]( "spark.shuffle.fetcher", "org.apache.spark.BlockStoreShuffleFetcher") val httpFileServer = new HttpFileServer() httpFileServer.initialize() System.setProperty("spark.fileserver.uri", httpFileServer.serverUri) val metricsSystem = if (isDriver) { MetricsSystem.createMetricsSystem("driver") } else { MetricsSystem.createMetricsSystem("executor") } metricsSystem.start() new SparkEnv( executorId, actorSystem, serializerManager, serializer, closureSerializer, cacheManager, mapOutputTracker, shuffleFetcher, broadcastManager, blockManager, connectionManager, httpFileServer, sparkFilesDir, metricsSystem) } }
本文章摘自博客园,原文发布日期:2014-01-13