#下载hive mysql,spark,TPC测试工具
os=`uname -m`
if [[ $os = 'x86_64' ]]; then
yum install -y java-1.8.0-openjdk-devel.x86_64
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/x86/TPC.tar.gz
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/x86/apache-hive-2.3.7-bin.tar.gz
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/x86/mysql-connector-java-8.0.26.jar
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/x86/spark-3.2.1-bin-hadoop3.2.tgz
wget -P /root http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/x86/tpcds-kit.tar.gz
cd /opt && tar -xf TPC.tar.gz
cd /opt && tar -xf apache-hive-2.3.7-bin.tar.gz
cd /opt && tar -xf spark-3.2.1-bin-hadoop3.2.tgz
cd /root && tar -xf tpcds-kit.tar.gz
rm -rf /opt/apache-hive-2.3.7-bin/conf/hive-site.xml
cp /root/trans/config/hive/hive-site.xml /opt/apache-hive-2.3.7-bin/conf/
cp /root/trans/config/hive/hive-site.xml /opt/spark-3.2.1-bin-hadoop3.2/conf
echo -e "export SPARK_HOME=/opt/spark-3.2.1-bin-hadoop3.2 \n" >>/etc/profile.d/env.sh
echo -e "export HIVE_HOME=/opt/apache-hive-2.3.7-bin \n" >>/etc/profile.d/env.sh
else
yum install -y java-1.8.0-openjdk-devel.aarch64
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/arm/TPC.tar.gz
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/arm/apache-hive-3.1.2-bin.tar.gz
wget -P /opt http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/arm/spark-3.2.1-bin-hadoop3.2.tgz
wget -P /root http://fastmr.oss-cn-shenzhen.aliyuncs.com/bigdata/arm/tpcds-kit.tar.gz
cd /opt && tar -xf TPC.tar.gz
cd /opt && tar -xf apache-hive-3.1.2-bin.tar.gz
cd /opt && tar -xf spark-3.2.1-bin-hadoop3.2.tgz
cd /root && tar -xf tpcds-kit.tar.gz
rm -rf /opt/apache-hive-3.1.2-bin/conf/hive-site.xml
cp /root/trans/config/hive/hive-site.xml /opt/apache-hive-3.1.2-bin/conf/
cp /root/trans/config/hive/hive-site.xml /opt/spark-3.1.2-bin-hadoop3.2/conf
echo -e "export SPARK_HOME=/opt/spark-3.1.2-bin-hadoop3.2 \n" >>/etc/profile.d/env.sh
echo -e "export HIVE_HOME=/opt/apache-hive-3.1.2-bin \n" >>/etc/profile.d/env.sh
fi
echo -e "export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk \n" >>/etc/profile.d/env.sh
echo -e "export PATH=\$PATH:\$JAVA_HOME/bin:\$SPARK_HOME/bin:\$HIVE_HOME/bin \n" >>/etc/profile.d/env.sh
source /etc/profile.d/env.sh
#修改hive-site.xml
#hive.execution.engine 改为spark
cd /opt/apache-hive-3.1.2-bin/conf/ && grep -A1 hive.execution.engine hive-site.xml|sed -i 's/mr/spark/g'
#hive.metastore.warehouse.dir 修改为file:///
cd /opt/apache-hive-3.1.2-bin/conf/ && grep -A1 hive.metastore.warehouse.dir hive-site.xml|sed -i 's/\/user\/hive\/warehouse/file:\/\/\/tmp\/hive\/warehouse/g'
#修改/opt/spark-3.2.1-bin-hadoop3.2/conf/spark-defalut.conf文件
#driver
spark.driver.cores 4
spark.driver.memory 13g
spark.driver.maxResultSize 10g
#executor
spark.executor.instances 24
spark.executor.memory 13g
spark.executor.cores 4
spark.executor.memoryOverhead 1g
#shuffle
spark.task.maxFailures 4
spark.default.parallelism 288
spark.sql.files.minPartitionNum 640
spark.sql.shuffle.partitions 192
spark.shuffle.compress true
spark.shuffle.spill.compress true
#sql
spark.sql.broadcastTimeout 3600
spark.sql.files.maxPartitionBytes 256MB
spark.sql.parquet.compression.codec snappy
spark.sql.rankLimit.enabled true
spark.sql.adaptive.enabled true
spark.sql.autoBroadcastJoinThreshold 128MB
spark.sql.adaptive.autoBroadcastJoinThreshold 128MB
spark.sql.adaptive.advisoryPartitionSizeInBytes 32MB
spark.sql.adaptive.coalescePartitions.minPartitionSize 4MB
spark.sql.adaptive.coalescePartitions.minPartitionNum 1200
spark.sql.adaptive.coalescePartitions.initialPartitionNum 2400
spark.sql.adaptive.coalescePartitions.enabled true
spark.sql.adaptive.localShuffleReader.enabled true
spark.sql.adaptive.skewJoin.enabled true
spark.sql.adaptive.skewJoin.skewedPartitionFactor 5
spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 128m
spark.sql.optimizer.runtimeFilter.semiJoinReduction.enabled true
spark.sql.optimizer.runtime.bloomFilter.enabled true
spark.sql.optimizer.runtime.bloomFilter.creationSideThreshold 50MB
spark.sql.optimizer.runtimeFilter.number.threshold 10
#DF
spark.sql.optimizer.dynamicFilterPruning.enabled true
spark.sql.optimizer.dynamicPartitionPruning.enabled true
spark.sql.optimizer.dynamicDataPruning.enabled true
spark.sql.optimizer.dynamicPartitionPruning.useStats true
spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio 0.5
spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly true
spark.sql.optimizer.dynamicDataPruning.pruningSideThreshold 10GB
#cbo
spark.sql.cbo.enabled true
spark.sql.cbo.joinReorder.enabled true
spark.sql.cbo.planStats.enabled true
spark.sql.cbo.starSchemaDetection true
spark.sql.cbo.joinReorder.card.weight 0.6
spark.sql.cbo.joinReorder.ga.enabled true
#other
spark.io.compression.codec snappy
spark.task.maxFailures 4
spark.kryoserializer.buffer 640k
spark.memory.storageFraction 0.5
spark.shuffle.file.buffer 64k
spark.kryoserializer.buffer.max 2000m
spark.serializer org.apache.spark.serializer.KryoSerializer
spark.memory.fraction 0.6
spark.network.timeout 3600
spark.locality.wait=0s
spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version 2
#log
spark.eventLog.enabled true
spark.eventLog.dir hdfs://master1:9000/sparklogs
#OSS
spark.hadoop.fs.oss.endpoint oss-cn-beijing-internal.aliyuncs.com
spark.hadoop.fs.oss.accessKeyId <your accessKeyId>
spark.hadoop.fs.oss.accessKeySecret <your accessKeySecret>
#warehouse
spark.sql.warehouse.dir file:///tmp/spark/warehouse
#拷贝spark访问oss需要的jar包,所有节点都需要执行节点都需要执行
cp /opt/hadoop-3.3.1/share/hadoop/tools/lib/aliyun-sdk-oss-3.4.1.jar /opt/spark-3.2.1-bin-hadoop3.2/jars/
cp /opt/hadoop-3.3.1/share/hadoop/tools/lib/hadoop-aliyun-3.3.1.jar /opt/spark-3.2.1-bin-hadoop3.2/jars/
cp /opt/hadoop-3.3.1/share/hadoop/tools/lib/jdom-1.1.jar /opt/spark-3.2.1-bin-hadoop3.2/jars/
#主节点拷贝hadoop core-site.xml 到spark conf目录下
cp /opt/hadoop/etc/hadoop/core-site.xml /opt/spark-3.2.1-bin-hadoop3.2/conf/
#启动spark集群
/opt/spark-3.2.1-bin-hadoop3.2/sbin/start-master.sh
/opt/spark-3.2.1-bin-hadoop3.2/sbin/start-slaves.sh
#master节点执行TPC-DS 程序
$SPARK_HOME/bin/spark-submit \
--class com.databricks.spark.sql.perf.tpcds.TPCDS_Bench_DataGen \
--master spark://bigdata-forshell0:7077 \
spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar \
oss://fastmr-tianchi-beijing/fk/input/ tpcds_1t 1000 parquet
$SPARK_HOME/bin/spark-submit \
--class com.databricks.spark.sql.perf.tpcds.TPCDS_Bench_RunAllQuery \
--master spark://bigdata-forshell0:7077 \
spark-sql-perf_2.12-0.5.1-SNAPSHOT.jar \
all oss://fastmr-tianchi-beijing/fk/input/ tpcds_1t oss://fastmr-tianchi-beijing/fk/output/