阅读目录
一、环境要求
参考文档:DataSphere Studio & Linkis 单机一键部署文档
相关软件包:百度网盘
1.系统要求
CentOS为6或者7
2.安装依赖命令
命令依赖:
telnet; tar; sed; dos2unix; mysql; yum; java; unzip; zip; expect
yum provides */telnet #如果命令不存在通过此命令查看命令所属安装包安装即可,如telnet
3.JDK
tar xf jdk-8u201-linux-x64.tar.gz -C /usr/local/ cd /usr/local/ ln -sv jdk1.8.0_201/ java vim /etc/profile.d/java.sh export JAVA_HOME=/usr/local/java export CLASSPATH=$JAVA_HOME/lib/tools.jar export PATH=$JAVA_HOME/bin:$PATH . /etc/profile.d/java.sh java -version
4.调大用户文件描述符
vim /etc/security/limits.conf * soft nofile 65536 * hard nofile 131072 * soft nproc 2048 * hard nproc 4096 * soft memlock unlimited * hard memlock unlimited
5.本机免密登录
ssh-keygen -t rsa
ssh-copy-id -i 192.168.1.134
6.安装 python
#具体版本需要和spark版本兼容 其他版本下载地址https://repo.anaconda.com/archive/
wget https://repo.anaconda.com/archive/Anaconda2-4.2.0-Linux-x86_64.sh
chmod +x Anaconda2-4.2.0-Linux-x86_64.sh
./Anaconda2-4.2.0-Linux-x86_64.sh #安装目录/appcom/Install/anaconda2/
7.主机名解析
192.168.1.134 datasphere-server
8.创建部署用户
useradd hadoop
visudo
hadoop ALL=(ALL) NOPASSWD: NOPASSWD: ALL
二、安装依赖服务
1.Nginx
yum install nginx -y #应用会自动添加Nginx配置和重启Nginx,自定义Nginx目前未验证是否有问题
vim /etc/nginx/nginx.conf #修改Nginx用户为部署用户,或者安装完成后把前端代码移动到nginx用户可以访问目录
systemctl start nginx
2.MySQL
参考:https://www.cnblogs.com/panwenbin-logs/p/11425652.html 确保tmpdir目录MySQL用户有写权限 show variables like '%tmpdir%'; 创建 dss和hive库 create database dss default character set utf8mb4 COLLATE utf8mb4_general_ci; CREATE USER 'dss'@'%' IDENTIFIED BY 'Qwer@123'; GRANT ALL PRIVILEGES ON dss.* TO 'dss'@'%'; create database hive default character set utf8mb4 COLLATE utf8mb4_general_ci; CREATE USER 'hive'@'%' IDENTIFIED BY 'Qwer@123'; GRANT ALL PRIVILEGES ON hive.* TO 'hive'@'%'; FLUSH PRIVILEGES; exit;
3.Hadoop
mkdir -p /mnt/datasphere tar xf hadoop-2.7.2.tar.gz -C /mnt/datasphere cd /mnt/datasphere ln -sv hadoop-2.7.2/ hadoop cd hadoop/etc/hadoop/ cp hadoop-env.sh hadoop-env.sh-bak vim hadoop-env.sh export HDFS_NAMENODE_USER=hadoop export HDFS_DATANODE_USER=hadoop export HDFS_SECONDARYNAMENODE_USER=hadoop export YARN_RESOURCEMANAGER_USER=hadoop export YARN_NODEMANAGER_USER=hadoop export HADOOP_HOME=/mnt/datasphere/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin export JAVA_HOME=/usr/local/java cp core-site.xml core-site.xml-bak vim core-site.xml <configuration> <!-- 指定HDFS老大(namenode)的通信地址 --> <property> <name>fs.defaultFS</name> <value>hdfs://192.168.1.134:9000</value> </property> <!-- 指定hadoop运行时产生文件的存储路径 --> <property> <name>hadoop.tmp.dir</name> <value>/Hadoop/tmp</value> </property> <!-- 设置HDFS web UI访问用户 --> <property> <name>hadoop.http.staticuser.user</name> <value>hadoop</value> </property> </configuration> cp hdfs-site.xml hdfs-site.xml-bak vim hdfs-site.xml <configuration> <!-- 设置namenode的http通讯地址 --> <property> <name>dfs.namenode.http-address</name> <value>192.168.1.134:50070</value> </property> <!-- 设置namenode存放的路径 --> <property> <name>dfs.namenode.name.dir</name> <value>/Hadoop/name</value> </property> <!-- 设置datanode存放的路径 --> <property> <name>dfs.datanode.data.dir</name> <value>/Hadoop/data</value> </property> </configuration> cp mapred-site.xml.template mapred-site.xml vim mapred-site.xml <configuration> <!-- 通知框架MR使用YARN --> <property> <name>mapreduce.framework.name</name> <value>yarn</value> </property> </configuration> vim yarn-site.xml <configuration> <!-- Site specific YARN configuration properties --> <property> <name>yarn.resourcemanager.hostname</name> <value>192.168.1.134</value> </property> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <!--检查每个任务正使用的物理内存量,如果任务超出分配值,则将其杀掉,默认是true --> <property> <name>yarn.nodemanager.pmem-check-enabled</name> <value>false</value> </property> <!--检查每个任务正使用的虚拟内存量,如果任务超出分配值,则将其杀掉,默认是true --> <property> <name>yarn.nodemanager.vmem-check-enabled</name> <value>false</value> </property> </configuration> vim slaves 192.168.1.134 mkdir -pv /Hadoop/{data,name,tmp} vim /etc/profile.d/hadoop.sh export HADOOP_HOME=/mnt/datasphere/hadoop export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin chown hadoop:hadoop -R /Hadoop chown hadoop:hadoop -R /mnt su - hadoop . /etc/profile.d/hadoop.sh hdfs namenode -format start-all.sh jps 访问地址:192.168.1.134:8088,50070
4.hive
sudo vim /etc/profile.d/hive.sh export HIVE_HOME=/mnt/datasphere/hive export PATH=$JAVA_HOME/bin:$PATH:$HADOOP_HOME/bin:$HIVE_HOME/bin . /etc/profile.d/hive.sh tar xf apache-hive-2.3.3-bin.tar.gz -C /mnt/datasphere cd /mnt/datasphere ln -sv apache-hive-2.3.3-bin/ hive cd hive/conf cp hive-env.sh.template hive-env.sh vim hive-env.sh export JAVA_HOME=/usr/local/java export HADOOP_HOME=/mnt/datasphere/hadoop vim hive-site.xml <configuration> <property> <name>javax.jdo.option.ConnectionURL</name> <value>jdbc:mysql://192.168.1.134:3306/hive?useSSL=false</value> </property> <property> <name>javax.jdo.option.ConnectionDriverName</name> <value>com.mysql.jdbc.Driver</value> </property> <property> <name>javax.jdo.option.ConnectionUserName</name> <value>hive</value> # mysql用户 </property> <property> <name>javax.jdo.option.ConnectionPassword</name> <value>Qwer@123</value> # mysql密码 </property> <property> <name>datanucleus.schema.autoCreateAll</name> <value>true</value> </property> </configuration> mv mysql-connector-java-5.1.27-bin.jar /mnt/datasphere/hive/lib/ schematool -initSchema -dbType mysql hive -e "show databases"
5.spark(on yarn)
#其他版本下载地址 http://archive.apache.org/dist/spark/ tar xf spark-2.4.3-bin-hadoop2.7.tgz -C /mnt/datasphere cd /mnt/datasphere ln -sv spark-2.4.3-bin-hadoop2.7/ spark cd spark/conf/ cp spark-env.sh.template spark-env.sh vim spark-env.sh export JAVA_HOME=/usr/local/java export HADOOP_CONF_DIR=/mnt/datasphere/hadoop/etc/hadoop/ export YARN_CONF_DIR=/mnt/datasphere/hadoop/etc/hadoop/ export SPARK_DIST_CLASSPATH=$(/mnt/datasphere/hadoop/bin/hadoop classpath) sudo vim /root/.bash_profile export PATH=/mnt/datasphere/spark/bin:$PATH . /root/.bash_profile cd .. spark-submit --class org.apache.spark.examples.SparkPi --master yarn --deploy-mode cluster --driver-memory 4g --executor-memory 2g --executor-cores 1 --queue default examples/jars/spark-examples*.jar 10 spark-sql -e "show databases" 访问yarn控制台查看执行结果 http://datasphere-server:8088
三、安装DataSphere Studio & Linkis
mv dss_linkis_one-click_install_20221201.zip /mnt/datasphere cd /mnt/datasphere unzip -q dss_linkis_one-click_install_20221201.zip cd dss_linkis_one-click_install_20221201/ cd conf/ cp config.sh config.sh-bak vim config.sh deployUser=hadoop LINKIS_VERSION=1.1.1 DSS_NGINX_IP=192.168.1.134 DSS_WEB_PORT=8085 DSS_VERSION=1.1.1 WORKSPACE_USER_ROOT_PATH=file:///tmp/linkis/ HDFS_USER_ROOT_PATH=hdfs:///tmp/linkis RESULT_SET_ROOT_PATH=hdfs:///tmp/linkis ENGINECONN_ROOT_PATH=/appcom/tmp HADOOP_CONF_DIR=/mnt/datasphere/hadoop/etc/hadoop/ #Hadoop spark hive配置目录根据实际情况修改 HIVE_CONF_DIR=/mnt/datasphere/hive/conf SPARK_CONF_DIR=/mnt/datasphere/spark/conf/ LINKIS_PUBLIC_MODULE=lib/linkis-commons/public-module YARN_RESTFUL_URL=http://datasphere-server:8088 SPARK_VERSION=2.4.3 HIVE_VERSION=2.3.3 PYTHON_VERSION=python2 LINKIS_EUREKA_INSTALL_IP=127.0.0.1 LINKIS_EUREKA_PORT=9600 LINKIS_GATEWAY_PORT=9001 LINKIS_MANAGER_PORT=9101 LINKIS_ENGINECONNMANAGER_PORT=9102 LINKIS_ENGINECONN_PLUGIN_SERVER_PORT=9103 LINKIS_ENTRANCE_PORT=9104 LINKIS_PUBLICSERVICE_PORT=9105 LINKIS_CS_PORT=9108 export SERVER_HEAP_SIZE="512M" EMAIL_HOST=smtp.163.com EMAIL_PORT=25 EMAIL_USERNAME=xxx@163.com EMAIL_PASSWORD=xxxxx EMAIL_PROTOCOL=smtp ORCHESTRATOR_FILE_PATH=/appcom/tmp/dss EXECUTION_LOG_PATH=/appcom/tmp/dss cp db.sh db.sh-bak vim db.sh ### for DSS-Server and Eventchecker APPCONN MYSQL_HOST=192.168.1.134 MYSQL_PORT=3306 MYSQL_DB=dss MYSQL_USER=dss MYSQL_PASSWORD=Qwer@123 #主要是配合scriptis一起使用,如果不配置,会默认尝试通过$HIVE_CONF_DIR 中的配置文件获取 HIVE_META_URL=jdbc:mysql://192.168.1.134:3306/hive?useSSL=false HIVE_META_USER=hive HIVE_META_PASSWORD=Qwer@123 sudo mkdir /appcoms sudo chown -R hadoop:hadoop /appcoms
执行安装脚本
cd ../bin
sh install.sh # 安装时,脚本会询问您是否需要初始化数据库并导入元数据,Linkis 和 DSS 均会询问,第一次安装必须选是,其他按需
2: Dangerous! Clear all data and rebuild the tables
修改linkis-ps-publicservice.properties配置,否则hive数据库刷新不出来表
cd ../linkis/conf/ vim linkis-ps-publicservice.properties hive.meta.url=jdbc:mysql://192.168.1.134:3306/hive?useSSL=false hive.meta.user=hive hive.meta.password=Qwer@123 linkis.metadata.hive.permission.with-login-user-enabled=false hadoop.config.dir=/mnt/datasphere/hadoop/etc/hadoop/
拷贝缺少的jar
cp /mnt/datasphere/hive/lib/datanucleus-* /mnt/datasphere/dss_linkis_one-click_install_20221201/linkis/lib/linkis-engineconn-plugins/hive/dist/v2.3.3/lib/
cp /mnt/datasphere/hive/lib/*jdo* /mnt/datasphere/dss_linkis_one-click_install_20221201/linkis/lib/linkis-engineconn-plugins/hive/dist/v2.3.3/lib/
配置环境变量
cd /mnt/datasphere/dss_linkis_one-click_install_20221201/linkis/conf 这个目录下properties文件中都加上 hadoop.config.dir=/mnt/datasphere/hadoop/etc/hadoop/ hive.config.dir=/mnt/datasphere/hadoop/etc/hadoop/ spark.config.dir=/mnt/datasphere/hadoop/etc/hadoop/ 这个目录下sh文件中都加上 HADOOP_CONF_DIR=/mnt/datasphere/hadoop/etc/hadoop export HADOOP_CONF_DIR=/mnt/datasphere/hadoop/etc/hadoop HIVE_CONF_DIR=/mnt/datasphere/hive/conf/ export HIVE_CONF_DIR=/mnt/datasphere/hive/conf/ SPARK_CONF_DIR=/mnt/datasphere/spark/conf/ export SPARK_CONF_DIR=/mnt/datasphere/spark/conf/
启动
cd /mnt/datasphere/dss_linkis_one-click_install_20221201/bin
sh start-all.sh
http://192.168.1.134:9600/ #Eureka地址查看微服务是否正常
http://192.168.1.134:8085/ #登录界面 用户:hadoop 密码:hadoop
安装默认Appconn
cd /mnt/datasphere/dss_linkis_one-click_install_20221201/dss/bin
sh install-default-appconn.sh
修改队列名称
yarn rmadmin -refreshQueues #允许所有用户使用所有队列,也可以单独设置