首先安装Centos系统修改网络配置
我的三台机器:
master 192.168.179.10 slave1 192.168.179.11 slava2 192.168.179.12
各组件端口号查看:大数据hadoop常见端口 - GoAl
CentOS7配置NAT模式网络详细步骤(亲测版)CentOS7配置NAT模式网络详细步骤(亲测版)_Jackson_mvp的博客-CSDN博客_centos配置nat
1.重启网络
service network restart | systemctl restart network.service
永久修改名字
hostnamectl set-hostname master1 vi /etc/hostname
设置共享目录 /mnt/hdfs
2.修改hosts,每个都要配
vi /etc/hosts 192.168.179.10 master 192.168.179.11 slave1 192.168.179.12 slave2
3. 系统防火墙和内核防火墙配置(每一个节点都需要执行)
vi /etc/selinux/config SELINUX=disabled setenforce 0 systemctl stop firewalld.service systemctl disable firewalld.service # 临时关闭内核防火墙 setenforce 0 # 永久关闭内核防火墙 vi /etc/selinux/config SELINUX=disabled # 临时关闭系统防火墙 systemctl stop firewalld.service #永久关闭内核防火墙 systemctl disable firewalld.service
4.免密登录
ssh-keygen -t rsa 每个都执行 # 生成公钥(主节点执行) 192.168.179. 9 和10都执行了 cat /root/.ssh/id_rsa.pub > /root/.ssh/authorized_keys chmod 600 /root/.ssh/authorized_keys # 复制其他节点的公钥(主节点执行) 10执行了 ssh slave1 cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys ssh slave2 cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys # 复制公钥到其他节点(主节点执行) scp /root/.ssh/authorized_keys root@slave1:/root/.ssh/authorized_keys scp /root/.ssh/authorized_keys root@slave2:/root/.ssh/authorized_keys
测试
ssh slave1 exit
首先查看已安装完成的系统配置文件(未安装则没有需要自己写,位置修改为自己的目录)。
vim /etc/profile export JAVA_HOME=/usr/local/jdk1.8.0_144 export PATH=$PATH:$JAVA_HOME/bin export HADOOP_HOME=/usr/local/hadoop/hadoop-2.8.1 export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export SCALA_HOME=/usr/local/scala-2.11.6 export PATH=$PATH:$SCALA_HOME/bin #export SPARK_HOME=/usr/local/spark-2.3.0-bin-hadoop2.7 #export PATH=$PATH:$SPARK_HOME/bin export ZOOKEEPER_HOME=/usr/local/zookeeper-3.4.5 export PATH=$PATH:$ZOOKEEPER_HOME/bin export HBASE_HOME=/usr/local/hbase-2.0.0-alpha3 export PATH=$PATH:$HBASE_HOME/bin export HIVE_HOME=/usr/local/hive export HIVE_CONF_DIR=${HIVE_HOME}/conf export PATH=$HIVE_HOME/bin:$PATH export FLUME_HOME=/usr/local/flume1.8 export PATH=$PATH:$FLUME_HOME/bin #set sqoop export SQOOP_HOME=/usr/local/sqoop1.4.7 export PATH=$SQOOP_HOME/bin:$PATH
JDK安装
rpm -qa | grep java rpm -e --nodeps java-1.8.0-openjdk-1.8.0.161-2.b14.el7.x86_64 # 配置环境变量,在配置最后加入 vim ~/.bashrc export JAVA_HOME=/usr/local/src/jdk1.8.0_172 export JRE_HOME=${JAVA_HOME}/jre export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib export PATH=${JAVA_HOME}/bin:$PATH # 复制环境变量到其他节点 scp ~/.bashrc root@slave1:~/ scp ~/.bashrc root@slave2:~/ # 复制JDK包到其他节点 scp -r /usr/local/src/jdk1.8.0_172 root@slave1:/usr/local/src/ scp -r /usr/local/src/jdk1.8.0_172 root@slave2:/usr/local/src/ # 重新加载环境变量 source ~/.bashrc # 测试环境是否配置成功 java -version
Hadoop安装(前提已安装jdk)
1.解压安装包,进入conf目录修改配置文件
vim core-site.xml <configuration> <property> <name>fs.defaultFS</name> <value>hdfs://master:9000</value> </property> <property> <name>hadoop.tmp.dir</name> <value>file:/usr/local/src/hadoop-2.6.5/tmp</value> </property> </configuration> vim hdfs-site.xml <configuration> <property> <name>dfs.namenode.secondary.http-address</name> <value>master:9001</value> </property> <property> <name>dfs.namenode.name.dir</name> <value>file:/usr/local/src/hadoop-2.6.5/dfs/name</value> </property> <property> <name>dfs.datanode.data.dir</name> <value>file:/usr/local/src/hadoop-2.6.5/dfs/data</value> </property> <property> <name>dfs.replication</name> <value>3</value> </property> </configuration> #修改配置文件 添加MR配置 cp mapred-site.xml.template mapred-site.xml vim mapred-site.xml <configuration> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> # 修改配置文件 添加资源管理配置 vim yarn-site.xml <configuration> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>master:8032</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>master:8035</value> </property> <property> <name>yarn.resourcemanager.admin.address</name> <value>master:8033</value> </property> <property> <name>yarn.resourcemanager.webapp.address</name> <value>master:8088</value> </property> </configuration> # 创建临时目录和文件目录 mkdir /usr/local/src/hadoop-2.6.5/tmp mkdir -p /usr/local/src/hadoop-2.6.5/dfs/name mkdir -p /usr/local/src/hadoop-2.6.5/dfs/data # 配置环境变量,在配置最后加入 vim ~/.bashrc export PATH=$PATH:$HADOOP_HOME/bin export HADOOP_HOME=/usr/local/src/hadoop-2.6.5 # 复制环境变量到从节点 scp -r ~/.bashrc root@slave1:~/ scp -r ~/.bashrc root@slave2:~/ # 复制Hadoop包到从节点 scp -r /usr/local/src/hadoop-2.6.5 root@slave1:/usr/local/src/ scp -r /usr/local/src/hadoop-2.6.5 root@slave2:/usr/local/src/ # 重新加载环境变量 source ~/.bashrc # 格式化Namenode hadoop namenode -format common.Storage: Storage directory /usr/local/src/hadoop-2.6.5/dfs/name has been successfully formatted. jps命令在master和slave查看进程是否启动 hive1.2.2 # 解压Hive包 cd /usr/local/src tar zxvf apache-hive-1.2.2-bin.tar.gz # 修改配置文件 配置元数据相关信息 cd apache-hive-1.2.2-bin/conf vim hive-site.xml <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>root</value> </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>123456</value> </property> </configuration> # 增加环境变量 vim ~/.bashrc export HIVE_HOME=/usr/local/src/hive1.2.2 export PATH=$HIVE_HOME/bin:$PATH # 重新加载环境变量 source ~/.bashrc # 配置JDBC连接工具 cd /usr/local/src tar zxvf mysql-connector-java-5.1.44.tar.gz # 复制JDBC库文件到Hive中 cp mysql-connector-java-5.1.44/mysql-connector-java-5.1.44-bin.jar apachehive-1.2.2-bin/lib # 启动Hive服务 hive 报错
解决方法:
将hadoop中share/hadoop/yarn/lib路径下的jline包换成hive中匹配的jar包。 将hadoop2.6.5/share/hadoop/yarn/libjline0.9 换成hive下的jline2.1 用hive jar包中的jline替换掉Hadoop jar包中的jline 在Hadoop根目录下运行命令: find ../ -name jline*
搜索结果:
../hadoop-2.6.5/share/hadoop/httpfs/tomcat/webapps/webhdfs/WEB-INF/lib/jline-0.9.94.jar ../hadoop-2.6.5/share/hadoop/yarn/lib/jline-0.9.94.jar ../hadoop-2.6.5/share/hadoop/kms/tomcat/webapps/kms/WEB-INF/lib/jline-0.9.94.jar ../scala-2.11.4/lib/jline-2.12.jar ../hive1.2.2/lib/jline-2.12.jar ../zookeeper-3.4.11/lib/jline-0.9.94.jar ../zookeeper-3.4.11/lib/jline-0.9.94.LICENSE.txt ../zookeeper-3.4.11/src/java/lib/jline-0.9.94.LICENSE.txt
将Hive根目录下jline-2.12.jar替换到hadoop中
可参考:https://blog.csdn.net/wanghaiyuan1993/article/details/46272553
Zookeeper3.4.11
# 解压Zookeeper包 cd /usr/local/src tar zxvf zookeeper-3.4.11.tar.gz # 修改配置文件 注释第12行,在最后加上配置 cd /usr/local/src/zookeeper-3.4.11/conf cp zoo_sample.cfg zoo.cfg vim zoo.cfg dataDir=/usr/local/src/zookeeper-3.4.11/data dataLogDir=/usr/local/src/zookeeper-3.4.11/log server.1=master:2888:3888 server.2=slave1:2888:3888 server.3=slave2:2888:3888 # 创建日志文件夹及数据文件夹 mkdir /usr/local/src/zookeeper-3.4.11/data mkdir /usr/local/src/zookeeper-3.4.11/log # 配置环境变量 vim ~/.bashrc export ZOOKEEPER_HOME=/usr/local/src/zookeeper-3.4.11 export PATH=$PATH:$ZOOKEEPER_HOME/bin # 复制环境变量到其他节点 scp ~/.bashrc root@slave1:~/ scp ~/.bashrc root@slave2:~/ # 重新加载环境变量 source ~/.bashrc # 复制Zookeeper包到从节点 scp -r /usr/local/src/zookeeper-3.4.11 root@slave1:/usr/local/src/ scp -r /usr/local/src/zookeeper-3.4.11 root@slave2:/usr/local/src/ #分别添加ID,每一个节点的id都不能重复且需与配置文件一致 #Master echo "1">/usr/local/src/zookeeper-3.4.11/data/myid #Slave1 echo "2">/usr/local/src/zookeeper-3.4.11/data/myid #Slave2 echo "3" > /usr/local/src/zookeeper-3.4.11/data/myid #启动Zookeeper服务(每个节点都要启动) zkServer.sh start zkServer.sh stop zkServer.sh status All: QuorumPeerMain HBASE0.9.8 # 解压Hbase包 cd /usr/local/src tar zxvf hbase-1.3.1-bin.tar.gz # 配置regionservers主机名 cd hbase0.9.8/conf vim regionservers master slave1 slave2 # 配置环境变量及不启用Hbase自带的Zookeeper服务 vim hbase-env.sh export JAVA_HOME=/usr/local/src/jdk1.8.0_172 export HBASE_MANAGES_ZK=false 第29、124行 # 配置Hbase核心参数 vim hbase-site.xml <configuration> <property> <name>hbase.tmp.dir</name> <value>/usr/local/src/hbase0.9.8/tmp/hbase</value> </property> <property> <name>hbase.rootdir</name> <value>hdfs://master:9000/hbase</value> </property> <property> <name>hbase.cluster.distributed</name> <value>true</value> </property> <property> <name>hbase.zookeeper.quorum</name> <value>master,slave1,slave2</value> </property> <property> <name>hbase.zookeeper.property.dataDir</name> <value>/usr/local/src/zookeeper-3.4.11/tmp/zookeeper</value> </property> <property> <name>hbase.master.info.port</name> <value>60010</value> </property> </configuration> # 创建临时目录 mkdir -p /usr/local/src/hbase0.9.8/tmp/hbase mkdir -p /usr/local/src/zookeeper-3.4.11/tmp/zookeeper # 复制HBase包到其他节点 scp -r /usr/local/src/hbase0.9.8 root@slave1:/usr/local/src/ scp -r /usr/local/src/hbase0.9.8 root@slave2:/usr/local/src/ # 配置环境变量 vim ~/.bashrc export HBASE_HOME=/usr/local/src/ hbase0.9.8 export PATH=$PATH:$HBASE_HOME/bin #复制环境变量到其他节点 scp ~/.bashrc root@slave1:~/ scp ~/.bashrc root@slave2:~/ #重新加载环境变量 source ~/.bashrc 各组件启动命令: 启动HDFS cd /usr/local/src/hadoop-2.6.5/sbin ./start-all.sh 启动Zookeeper ,每个节点都写 zkServer.sh start zkServer.sh status 启动spark(任意路径下) , 启动hbase可以不用启动spark start-all.sh 启动 hbase start-hbase.sh 关闭hbase stop-hbase.sh zkServer.sh stop cd /usr/local/src/hadoop-2.6.5/sbin ./stop-all.sh jps Master: HMaster HRegionServer Slave: HRegionServer Flume 1.6 # 解压Kafka包 cd /usr/local/src tar zxvf apache-flume-1.6.0-bin.tar.gz # 修改配置文件,配置工作模式 cd apache-flume-1.6.0-bin/conf cp flume-env.sh.template flume-env.sh vi flume-env.sh export JAVA_HOME=/usr/local/src/jdk1.8.0_172 export JAVA_OPTS="-Xms100m -Xmx2000m -Dcom.sun.management.jmxremote" # 修改配置文件,配置工作模式 cd apache-flume-1.6.0-bin/conf #NetCat vim flume-netcat.conf # Name the components on this agent agent.sources = r1 agent.sinks = k1 agent.channels = c1 # Describe/configuration the source agent.sources.r1.type = netcat agent.sources.r1.bind = 127.0.0.1 agent.sources.r1.port = 44444 # Describe the sink agent.sinks.k1.type = logger # Use a channel which buffers events in memory agent.channels.c1.type = memory agent.channels.c1.capacity = 1000 agent.channels.c1.transactionCapacity = 100 # Bind the source and sink to the channel agent.sources.r1.channels = c1 agent.sinks.k1.channel = c1 #验证 #Server 在 flume1.6.0目录下敲 flume-ng agent --conf conf --conf-file conf/flume-netcat.conf -name=agent -Dflume.root.logger=INFO,console #Client windows上 telnet master 44444 kafka_2.11-0.10.2.1 可参考以下安装: 1.https://blog.csdn.net/qq_43605654/article/details/90786063?depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-1&utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-1 2.https://blog.csdn.net/CarolRain/article/details/78376642?depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-6&utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-6 # 解压Kafka包 cd /usr/local/src tar zxvf kafka_2.11-0.10.2.1.tgz # 配置Zookeeper为集群中所有部署zookeeper的主机 cd kafka_2.11-0.10.2.1/config vim server.properties 119行:zookeeper.connect=master:2181,slave1:2181,slave2:218 # 增加环境变量 vim ~/.bashrc export KAFKA_HOME=/usr/local/src/kafka_2.11-0.10.2.1 export PATH=$KAFKA_HOME/bin:$PATH # 复制Kafka包到从节点 scp -r /usr/local/src/kafka_2.11-0.10.2.1 root@slave1:/usr/local/src/ scp -r /usr/local/src/kafka_2.11-0.10.2.1 root@slave2:/usr/local/src/ #复制环境变量到其他节点 scp ~/.bashrc root@slave1:~/ scp ~/.bashrc root@slave2:~/ # 重新加载环境变量 source ~/.bashrc #修改Kafka broker id,集群内每个broker id要求唯一 #Master vim /usr/local/src/kafka_2.11-0.10.2.1/config/server.properties broker.id=0 #Slave1 broker.id=1 #Slave2 broker.id=2 # 创建Zookeeper的启动脚本(如果启动了Zookeeper集群则跳过此步骤) vim /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka-zookeeper.sh /usr/local/src/kafka_2.11-0.10.2.1/bin/zookeeper-server-start.sh /usr/local/src/kafka_2.11-0.10.2.1/config/zookeeper.propeties # 授予执行权限 chmod +x /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka-zookeeper.sh # 启动Kafka自带的Zookeeper集群(全部节点都需要单独启动) start-kafka-zookeeper.sh # 创建Kafka的启动脚本 vim /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh /usr/local/src/kafka_2.11-0.10.2.1/bin/kafka-server-start.sh -daemon /usr/local/src/kafka_2.11-0.10.2.1/config/server.properties # 授予执行权限 chmod +x /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh # 复制Kafka启动脚本到从节点 scp -r /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh root@slave1:/usr/local/src/kafka_2.11-0.10.2.1/bin/ scp -r /usr/local/src/kafka_2.11-0.10.2.1/bin/start-kafka.sh root@slave2:/usr/local/src/kafka_2.11-0.10.2.1/bin/ # 启动Kafka集群 (全部节点都需要单独启动) start-kafka.sh bin/kafka-server-start.sh config/server.properties Spark安装1.6.3 # 解压Spark和Scala包 cd /usr/local/src tar zxvf spark-2.0.2-bin-hadoop2.6.tgz tar zxvf scala-2.11.8.tgz # 修改配置文件 配置环境变量 cd spark-2.0.2-bin-hadoop2.6/conf cp spark-env.sh.template spark-env.sh vim spark-env.sh export SCALA_HOME=/usr/local/src/scala-2.11.4 export JAVA_HOME=/usr/local/src/jdk1.8.0_172 export HADOOP_HOME=/usr/local/src/hadoop-2.6.5 export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop SPARK_MASTER_IP=master SPARK_LOCAL_DIRS=/usr/local/src/spark1.63. SPARK_DRIVER_MEMORY=1G # 修改配置文件 添加从节点主机名 cp slaves.template slaves vim slaves slave1 slave2 # 配置环境变量,在配置最后加入 vim ~/.bashrc # 复制环境变量到其他节点 export SCALA_HOME=/usr/local/src/scala export PATH=$PATH:$SCALA_HOME/bin export SPARK_HOME=/usr/local/src/spark1.6.3 export PATH=$PATH:$SPARK_HOME/bin:$SPARK_HOME/sbin scp -r ~/.bashrc root@slave1:~/ scp -r ~/.bashrc root@slave2:~/ # 复制Scala包到从节点 scp -r /usr/local/src/scala-2.11.4 root@slave1:/usr/local/src/ scp -r /usr/local/src/scala-2.11.4 root@slave2:/usr/local/src/ # 复制Spark包到从节点 scp -r /usr/local/src/spark1.6.3 root@slave1:/usr/local/src/ scp -r /usr/local/src/spark1.6.3 root@slave2:/usr/local/src/ # 重新加载环境变量 source ~/.bashrc # 启动集群 start-all.sh start-master.sh start-slaves.sh 关闭: stop-all.sh stop-master.sh stop-slaves.sh
WEBUI查看:
http://master:8080/