mv /home/abc /home/abcd(修改目录名)
(1)vi /usr/local/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://skinglzw:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
<property>
<name>hadoop.native.lib</name>
<value>true</value>
<description>shold native hadoop libraries, if present ,be use</description>
</property>
</configuration>
(2)vi hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>skinglzw:50090</value>
<description>The secondary namenode thhp server address and port.</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/usr/local/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///usr/local/hadoop/dfs/namesecondary</value>
<description>Determaine where on the local filesystem theasfdfasfafafasdfaf sdfasfasdfas</description>
</property>
(3)cp ./mapred-site.xml.template ./mapred-site.xml
vi mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
(4)vi yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>skinglzw</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
(5)vi hadoop-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_COMMON_LIB_NATIVE=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
(5.1) vi slaves
skinglzw1
skinglzw2
skinglzw3
skinglzw4
(6)vi ~/.bashrc
export JAVA_HOME=/usr/local/jdk/jdk1.8.0
export JRE_HOME=${JAVA_HOME}/jre
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_COMMON_LIB_NATIVE_DIR=${HADOOP_HOME}/lib/native
export HADOOP_OPTS="-Djava.library.path=${HADOOP_HOME}/lib"
export SCALA_HOME=/usr/local/scala/scala-2.10.4
export SPARK_HOME=/usr/local/spark/spark-1.6.0-bin-hadoop2.6
export FLINK_HOME=/usr/local/flink/flink-0.9.0
export HIVE_HOME=/usr/local/hive/apache-hive-1.2.1
export M2_HOME=/usr/local/spark/apache-maven-3.2.2/
export CLASS_PATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib:${HIVE_HOME}/lib
export PATH=/usr/local/eclipse/eclipse:${M2_HOME}/bin:${SPARK_HOME}/bin:${SPARK_HOME}/sbin:${SCALA_HOME}/bin:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${HIVE_HOME}/bin:${FLINK_HOM}/bin:$PATH
source ~/.bashrc(每台机器都要执行,才能生效)
scp /home/skinglzw/.bashrc root@skinglzw1:/home/skinglzw1/.bashrc
scp /home/skinglzw/.bashrc root@skinglzw2:/home/skinglzw2/.bashrc
scp /home/skinglzw/.bashrc root@skinglzw3:/home/skinglzw3/.bashrc
scp /home/skinglzw/.bashrc root@skinglzw4:/home/skinglzw4/.bashrc
vi /etc/hosts
192.168.23.131 skinglzw
192.168.23.128 skinglzw1
192.168.23.129 skinglzw2
192.168.23.130 skinglzw3
192.168.23.132 skinglzw4
scp /etc/hosts root@skinglzw1:/etc/hosts
scp /etc/hosts root@skinglzw2:/etc/hosts
scp /etc/hosts root@skinglzw3:/etc/hosts
scp /etc/hosts root@skinglzw4:/etc/hosts
scp -r /usr/local/jdk/ root@skinglzw4:/usr/local(传某个目录,整个目录加-r)
scp -r /usr/local/scala/ root@skinglzw4:/usr/local
scp -r /usr/local/spark/ root@skinglzw4:/usr/local
scp -r /usr/local/hive/ root@skinglzw4:/usr/local
scp -r /usr/local/hadoop/ root@skinglzw4:/usr/local
文件格式化
bin/hdfs namenode -format
/usr/local/hadoop/sbin
./start-dfs.sh
./stop-dfs.sh
yarn资源管理或者调度
./start-yarn.sh(启动yarn比启动hdfs快很多,因为safemode进行数据的检查和恢复)
http://skinglzw:50070
http://skinglzw:8088
配置spark cp ./spark-env.sh.template ./spark-env.sh vi spark-env.sh
export JAVA_HOME=/usr/local/jdk/jdk1.8.0
export SCALA_HOME=/usr/local/scala/scala-2.10.4
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=/usr/local/hadoop/etc/hadoop
export SPARK_MASTER_IP=skinglzw
export SPARK_WORKER_MEMORY=1g
export SPARK_EXECUTOR_MEMORY=1g
export SPARK_DRIVER_MEMORY=1g
export SPARK_WORKER_CORES=8
vi slaves
skinglzw1
skinglzw2
skinglzw3
skinglzw4
vi spark-defaults.conf
spark.executor.extraJavaOptions -XX:+PrintGCDetails -Dkey=value -Dnumbers="one two three"
spark.eventLog.enabled true
spark.eventLog.dir hdfs://skinglzw:9000/historyserverforSpark
spark.yarn.historyServer.address skinglzw:18080
spark.history.fs.logDirectory hdfs://skinglzw:9000/historyserverforSpark
scp ./spark-env.sh root@skinglzw1:/usr/local/spark/spark-1.6.0-bin-hadoop2.6/conf/spark-env.sh
scp ./slaves root@skinglzw1:/usr/local/spark/spark-1.6.0-bin-hadoop2.6/conf/slaves
hadoop dfs rm -r /historyserverforSpark (删除历史信息,如果有的话就删除)
hadoop dfs mkdir /historyserverforSpark
/usr/local/spark/spark-1.6.0-bin-hadoop2.6/sbin
ulimit -c unlimited(我的虚拟机是单核的2线程,所有要先设置下,不然会报错,启动不起啦)
nodemanaget没启动起来不知道为什么
./start-all.sh(去其他机器用jps看下work进程有没有启动起来,没有的话,stop-all.sh命令执行,再start-all.sh执行一次,还不成功,多试几次就成功了,我的虚拟机配置低的原因)
./start-history-server.sh
http://skinglzw:18080
bin目录下
spark-submit 提交spark程序
spark-shell
本文转自 skinglzw 51CTO博客,原文链接:http://blog.51cto.com/skinglzw/1867055,如需转载请自行联系原作者