接上篇:https://developer.aliyun.com/article/1623254?spm=a2c6h.13148508.setting.18.66e24f0etlssu8
dim_product_data
# 设置参数 output_file = 'dim_product_data.txt' # 定义产品ID和产品名称 products = [ ('P001', 'Smartphone'), ('P002', 'Laptop'), ('P003', 'Tablet'), ('P004', 'Smartwatch'), ('P005', 'Camera'), ('P006', 'Headphones'), ('P007', 'Monitor'), ('P008', 'Keyboard'), ('P009', 'Mouse'), ('P010', 'Printer') ] # 生成数据 with open(output_file, 'w') as f: for product_id, product_name in products: line = f"{product_id},{product_name}\n" f.write(line) print(f"Product data has been written to {output_file}")
生成数据如下图所示:
dim_region_data
# 设置参数 output_file = 'dim_region_data.txt' # 定义区域ID和区域名称 regions = [ ('R001', 'North America'), ('R002', 'Europe'), ('R003', 'Asia'), ('R004', 'South America'), ('R005', 'Africa'), ('R006', 'Australia'), ('R007', 'Antarctica') ] # 生成数据 with open(output_file, 'w') as f: for region_id, region_name in regions: line = f"{region_id},{region_name}\n" f.write(line) print(f"Region data has been written to {output_file}")
生成的数据如下图所示:
kylin_examples.sql
-- 创建订单数据库、表结构 create database if not exists `wzk_kylin`; -- 1、销售表:dw_sales -- id 唯一标识 -- date1 日期 -- channelId 渠道ID -- productId 产品ID -- regionId 区域ID -- amount 数量 -- price 金额 create table wzk_kylin.dw_sales( id string, date1 string, channelId string, productId string, regionId string, amount int, price double )ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; -- 2、渠道表:dim_channel -- channelId 渠道ID -- channelName 渠道名称 create table wzk_kylin.dim_channel( channelId string, channelName string )ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; -- 3、产品表:dim_product create table wzk_kylin.dim_product( productId string, productName string )ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; --4、区域表:dim_region create table wzk_kylin.dim_region( regionId string, regionName string )ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; -- 导入数据 LOAD DATA LOCAL INPATH '/opt/wzk/kylin_test/dw_sales_data.txt' OVERWRITE INTO TABLE wzk_kylin.dw_sales; LOAD DATA LOCAL INPATH '/opt/wzk/kylin_test/dim_channel_data.txt' OVERWRITE INTO TABLE wzk_kylin.dim_channel; LOAD DATA LOCAL INPATH '/opt/wzk/kylin_test/dim_product_data.txt' OVERWRITE INTO TABLE wzk_kylin.dim_product; LOAD DATA LOCAL INPATH '/opt/wzk/kylin_test/dim_region_data.txt' OVERWRITE INTO TABLE wzk_kylin.dim_region;
运行数据
我们需要把刚才的数据上传到指定目录上,/opt/wzk/目录下。
cd /opt/wzk/kylin_test • 1
我已经上传到服务器上了:
SQL文件也记得上传上去
执行Hive:
hive -f kylin_examples.sql • 1
执行结果如下图所示:
测试数据
我们需要启动Hive
hive • 1
执行结果如下图所示:
执行如下的指令:
use wzk_kylin; select date1, sum(price) as total_money, sum(amount) as total_amount from dw_sales group by date1;
执行结果如下图所示: