简介
目标是让云上数据仓库用户及开发者通过简单的步骤体验基于AnalyticDB MySQL版和DMS构建云原生数据仓库的主要流程,场景将通过实例的开通、结构与数据的初始化、报表的开发、报表可视化等环节,用3个具体的应用场景来体验AnalyticDB MySQL版在新零售场景下的交互查询和ETL计算速度,以及通过DMS进行数据仓库数据报表开发的流程。
提供的数据集是一个零售场景的模拟数据,包括客户信息、订单记录、货物信息、国家地域信息等内容,数据总量10GB,最大数据表记录数为5999万条。
产品简介
云原生数据仓库AnalyticDB MySQL版是一种支持高并发低延时查询的新一代云原生数据仓库,高度兼容MySQL协议以及SQL:2003 语法标准,可以对海量数据进行即时的多维分析透视和业务探索,快速构建企业云上数据仓库。
数据管理DMS是基于阿里巴巴集团十余年的数据库服务平台的云版本,提供免安装、免运维、即开即用、多种数据库类型与多种环境统一的web数据库管理终端;可以为企业用户快速复制搭建与阿里集团同等安全、高效、规范的数据库DevOps研发流程、数仓开发解决方案。
创建高权限账号
ADB集群详情页,按需自定义高权限的账号与密码
每个集群仅限1个高权限账号,可修改密码;
后续可通过DMS新建普通账号(ADB实例在DMS上直接以免费的自由操作管控模式使用即可)
地址:https://ads.console.aliyun.com/adb/cn-shenzhen/instances
选择已创建ADB实例所在的地域,并点击实例名称。
创建数据库
数据库库名可按需自定义。
地址:https://ads.console.aliyun.com/adb/cn-shenzhen/instances
选择已创建ADB实例所在的地域,并点击实例名称。
创建OSS外表
说明:
下列建表语句需要根据ADB的地域分别替换endpoint和URL参数中的字符串。例如ADB在北京区域。
需要将endpoint ”oss-cn-shenzhen-internal.aliyuncs.com“ 替换成”oss-cn-beijing-internal.aliyuncs.com“,
需要将URL中的”oss://adb-tpch-shenzhen“ 替换成”oss://adb-tpch-beijing“
各个地域替换的代码:
深圳:shenzhen
北京:beijing
杭州:hangzhou
上海:shanghai
张家口:zhangjiakou
注意:下列建表语句需要根据您云账号的accessid、accesskey 替换后方执行。
路径:全部功能-数据方案-数据导入
初始化表结构语句如下:(注,脚本需要本地编辑替换相关信息后,保存为一个.sql后缀的文本用于工单的提交)
####1、CUSTOMER 外表
CREATE TABLE `OSS_CUSTOMER` (
`C_CUSTKEY` int NOT NULL,
`C_NAME` varchar NOT NULL,
`C_ADDRESS` varchar NOT NULL,
`C_NATIONKEY` int NOT NULL,
`C_PHONE` varchar NOT NULL,
`C_ACCTBAL` decimal(12, 2) NOT NULL,
`C_MKTSEGMENT` varchar NOT NULL,
`C_COMMENT` varchar NOT NULL
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/customer/customer.tbl"
}';
####2、ORDERS外表
CREATE TABLE `OSS_ORDERS` (
`O_ORDERKEY` bigint NOT NULL,
`O_CUSTKEY` int NOT NULL,
`O_ORDERSTATUS` varchar NOT NULL,
`O_TOTALPRICE` decimal(12, 2) NOT NULL,
`O_ORDERDATE` date NOT NULL,
`O_ORDERPRIORITY` varchar NOT NULL,
`O_CLERK` varchar NOT NULL,
`O_SHIPPRIORITY` int NOT NULL,
`O_COMMENT` varchar NOT NULL
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/orders/orders.tbl"
}';
####3、LINEITEM外表
CREATE TABLE `OSS_LINEITEM` (
`L_ORDERKEY` bigint NOT NULL,
`L_PARTKEY` int NOT NULL,
`L_SUPPKEY` int NOT NULL,
`L_LINENUMBER` bigint NOT NULL,
`L_QUANTITY` decimal(12, 2) NOT NULL,
`L_EXTENDEDPRICE` decimal(12, 2) NOT NULL,
`L_DISCOUNT` decimal(12, 2) NOT NULL,
`L_TAX` decimal(12, 2) NOT NULL,
`L_RETURNFLAG` varchar NOT NULL,
`L_LINESTATUS` varchar NOT NULL,
`L_SHIPDATE` date NOT NULL,
`L_COMMITDATE` date NOT NULL,
`L_RECEIPTDATE` date NOT NULL,
`L_SHIPINSTRUCT` varchar NOT NULL,
`L_SHIPMODE` varchar NOT NULL,
`L_COMMENT` varchar NOT NULL
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/lineitem/lineitem.tbl"
}';
####4、NATION外表
CREATE TABLE `OSS_NATION` (
`N_NATIONKEY` int NOT NULL,
`N_NAME` varchar NOT NULL,
`N_REGIONKEY` int NOT NULL,
`N_COMMENT` varchar
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/nation/nation.tbl"
}';
####5、PART外表
CREATE TABLE `OSS_PART` (
`P_PARTKEY` int NOT NULL,
`P_NAME` varchar NOT NULL,
`P_MFGR` varchar NOT NULL,
`P_BRAND` varchar NOT NULL,
`P_TYPE` varchar NOT NULL,
`P_SIZE` int NOT NULL,
`P_CONTAINER` varchar NOT NULL,
`P_RETAILPRICE` decimal(12, 2) NOT NULL,
`P_COMMENT` varchar NOT NULL
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/part/part.tbl"
}';
####6、PARTSUPP外表
CREATE TABLE `OSS_PARTSUPP` (
`PS_PARTKEY` int NOT NULL,
`PS_SUPPKEY` int NOT NULL,
`PS_AVAILQTY` int NOT NULL,
`PS_SUPPLYCOST` decimal(12, 2) NOT NULL,
`PS_COMMENT` varchar NOT NULL
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/partsupp/partsupp.tbl"
}';
####7、REGION外表
CREATE TABLE `OSS_REGION` (
`R_REGIONKEY` int NOT NULL,
`R_NAME` varchar NOT NULL,
`R_COMMENT` varchar
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/region/region.tbl"
}';
####8、SUPPLIER外表
CREATE TABLE `OSS_SUPPLIER` (
`S_SUPPKEY` int NOT NULL,
`S_NAME` varchar NOT NULL,
`S_ADDRESS` varchar NOT NULL,
`S_NATIONKEY` int NOT NULL,
`S_PHONE` varchar NOT NULL,
`S_ACCTBAL` decimal(12, 2) NOT NULL,
`S_COMMENT` varchar NOT NULL
,`dummy` varchar
) engine='oss'
TABLE_PROPERTIES='{
"endpoint":"oss-cn-shenzhen-internal.aliyuncs.com",
"accessid":"xxxxxxxx",
"accesskey":"xxxxxxxx",
"delimiter":"|",
"URL":"oss://adb-tpch-shenzhen/tpch/10g/supplier/supplier.tbl"
}';
创建ADB表
创建ADB表
步骤同OSS外表创建
地址:https://dms.aliyun.com/
路径:全部功能-数据方案-数据导入
初始化表结构语句如下:(注,脚本需要拷贝到本地编辑,保存为一个.sql后缀的文本用于工单的提交)
#### CUSTOMER表
CREATE TABLE `CUSTOMER` (
`C_CUSTKEY` int NOT NULL,
`C_NAME` varchar NOT NULL,
`C_ADDRESS` varchar NOT NULL,
`C_NATIONKEY` int NOT NULL,
`C_PHONE` varchar NOT NULL,
`C_ACCTBAL` decimal(12, 2) NOT NULL,
`C_MKTSEGMENT` varchar NOT NULL,
`C_COMMENT` varchar NOT NULL,
primary key (c_custkey)
)
DISTRIBUTE BY HASH(`c_custkey`)
INDEX_ALL='Y';
#### ORDERS表
CREATE TABLE `ORDERS` (
`O_ORDERKEY` bigint NOT NULL,
`O_CUSTKEY` int NOT NULL,
`O_ORDERSTATUS` varchar NOT NULL,
`O_TOTALPRICE` decimal(12, 2) NOT NULL,
`O_ORDERDATE` date NOT NULL,
`O_ORDERPRIORITY` varchar NOT NULL,
`O_CLERK` varchar NOT NULL,
`O_SHIPPRIORITY` int NOT NULL,
`O_COMMENT` varchar NOT NULL,
primary key (o_orderkey,o_orderdate)
)
DISTRIBUTE BY HASH(`o_orderkey`)
PARTITION BY VALUE(`date_format(O_ORDERDATE, '%Y%m')`)
LIFECYCLE 90
INDEX_ALL='Y';
#### LINEITEM表
CREATE TABLE `LINEITEM` (
`L_ORDERKEY` bigint NOT NULL,
`L_PARTKEY` int NOT NULL,
`L_SUPPKEY` int NOT NULL,
`L_LINENUMBER` bigint NOT NULL,
`L_QUANTITY` decimal(12, 2) NOT NULL,
`L_EXTENDEDPRICE` decimal(12, 2) NOT NULL,
`L_DISCOUNT` decimal(12, 2) NOT NULL,
`L_TAX` decimal(12, 2) NOT NULL,
`L_RETURNFLAG` varchar NOT NULL,
`L_LINESTATUS` varchar NOT NULL,
`L_SHIPDATE` date NOT NULL,
`L_COMMITDATE` date NOT NULL,
`L_RECEIPTDATE` date NOT NULL,
`L_SHIPINSTRUCT` varchar NOT NULL,
`L_SHIPMODE` varchar NOT NULL,
`L_COMMENT` varchar NOT NULL,
primary key (l_orderkey,l_linenumber,l_shipdate)
)
DISTRIBUTE BY HASH(`l_orderkey`)
PARTITION BY VALUE(`date_format(l_shipdate, '%Y%m')`)
LIFECYCLE 90
INDEX_ALL='Y';
#### NATION表
CREATE TABLE `NATION` (
`N_NATIONKEY` int NOT NULL,
`N_NAME` varchar NOT NULL,
`N_REGIONKEY` int NOT NULL,
`N_COMMENT` varchar,
primary key (n_nationkey)
) DISTRIBUTE BY BROADCAST INDEX_ALL='Y';
#### PART表
CREATE TABLE `PART` (
`P_PARTKEY` int NOT NULL,
`P_NAME` varchar NOT NULL,
`P_MFGR` varchar NOT NULL,
`P_BRAND` varchar NOT NULL,
`P_TYPE` varchar NOT NULL,
`P_SIZE` int NOT NULL,
`P_CONTAINER` varchar NOT NULL,
`P_RETAILPRICE` decimal(12, 2) NOT NULL,
`P_COMMENT` varchar NOT NULL,
primary key (p_partkey)
)
DISTRIBUTE BY HASH(`p_partkey`)
INDEX_ALL='Y';
#### PARTSUPP表
CREATE TABLE `PARTSUPP` (
`PS_PARTKEY` int NOT NULL,
`PS_SUPPKEY` int NOT NULL,
`PS_AVAILQTY` int NOT NULL,
`PS_SUPPLYCOST` decimal(12, 2) NOT NULL,
`PS_COMMENT` varchar NOT NULL,
primary key (ps_partkey,ps_suppkey)
)
DISTRIBUTE BY HASH(`ps_partkey`)
INDEX_ALL='Y';
#### REGION表
CREATE TABLE `REGION` (
`R_REGIONKEY` int NOT NULL,
`R_NAME` varchar NOT NULL,
`R_COMMENT` varchar,
primary key (r_regionkey)
)
DISTRIBUTE BY BROADCAST
INDEX_ALL='Y';
#### SUPPLIER表
CREATE TABLE `SUPPLIER` (
`S_SUPPKEY` int NOT NULL,
`S_NAME` varchar NOT NULL,
`S_ADDRESS` varchar NOT NULL,
`S_NATIONKEY` int NOT NULL,
`S_PHONE` varchar NOT NULL,
`S_ACCTBAL` decimal(12, 2) NOT NULL,
`S_COMMENT` varchar NOT NULL,
primary key (s_suppkey)
)
DISTRIBUTE BY HASH(`s_suppkey`)
INDEX_ALL='Y';
转储OSS的数据到ADB中
步骤同建表操作
路径:全部功能-数据方案-数据导入
初始化脚本如下(注,脚本需要拷贝到本地编辑,保存为一个.sql后缀的文本用于工单的提交):
###1、CUSTOMER表
### 记录数:150W,存储空间:234M,导入耗时:1分钟
INSERT INTO CUSTOMER
(C_CUSTKEY,C_NAME,C_ADDRESS,C_NATIONKEY,C_PHONE,C_ACCTBAL,C_MKTSEGMENT,C_COMMENT)
SELECT C_CUSTKEY,C_NAME,C_ADDRESS,C_NATIONKEY,C_PHONE,C_ACCTBAL,C_MKTSEGMENT,C_COMMENT FROM OSS_CUSTOMER;
###2、ORDERS表
### 记录数:1500W,存储空间:1.6GB,导入耗时:10分钟
INSERT INTO ORDERS
(O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_TOTALPRICE,O_ORDERDATE,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT)
SELECT O_ORDERKEY,O_CUSTKEY,O_ORDERSTATUS,O_TOTALPRICE,O_ORDERDATE,O_ORDERPRIORITY,O_CLERK,O_SHIPPRIORITY,O_COMMENT FROM OSS_ORDERS;
###3、LINEITEM表
### 记录数:5999W,存储空间:7.24GB,导入耗时:40分钟
INSERT INTO LINEITEM
(L_ORDERKEY,L_PARTKEY,L_SUPPKEY,L_LINENUMBER,L_QUANTITY,L_EXTENDEDPRICE,L_DISCOUNT
,L_TAX,L_RETURNFLAG,L_LINESTATUS,L_SHIPDATE,L_COMMITDATE,L_RECEIPTDATE,L_SHIPINSTRUCT,L_SHIPMODE,L_COMMENT)
SELECT L_ORDERKEY,L_PARTKEY,L_SUPPKEY,L_LINENUMBER,L_QUANTITY,L_EXTENDEDPRICE,L_DISCOUNT
,L_TAX,L_RETURNFLAG,L_LINESTATUS,L_SHIPDATE,L_COMMITDATE,L_RECEIPTDATE,L_SHIPINSTRUCT,L_SHIPMODE,L_COMMENT FROM OSS_LINEITEM;
###4、NATION表
### 记录数:25,存储空间:2KB,导入耗时:1秒
INSERT INTO NATION
(N_NATIONKEY,N_NAME,N_REGIONKEY,N_COMMENT)
SELECT N_NATIONKEY,N_NAME,N_REGIONKEY,N_COMMENT FROM OSS_NATION;
###5、PART表
### 记录数:200W,存储空间:232M,导入耗时:23秒
INSERT INTO PART
(P_PARTKEY,P_NAME,P_MFGR,P_BRAND,P_TYPE,P_SIZE,P_CONTAINER,P_RETAILPRICE,P_COMMENT)
SELECT P_PARTKEY,P_NAME,P_MFGR,P_BRAND,P_TYPE,P_SIZE,P_CONTAINER,P_RETAILPRICE,P_COMMENT FROM OSS_PART;
###6、PARTSUPP表
### 记录数:800W,存储空间:1.12GB,导入耗时:3分钟
INSERT INTO PARTSUPP
(PS_PARTKEY,PS_SUPPKEY,PS_AVAILQTY,PS_SUPPLYCOST,PS_COMMENT)
SELECT PS_PARTKEY,PS_SUPPKEY,PS_AVAILQTY,PS_SUPPLYCOST,PS_COMMENT FROM OSS_PARTSUPP;
###7、REGION表
### 记录数:5,存储空间:0.38KB,导入耗时:10秒
INSERT INTO REGION
(R_REGIONKEY,R_NAME,R_COMMENT)
SELECT R_REGIONKEY,R_NAME,R_COMMENT from OSS_REGION;
###8、SUPPLIER表
### 记录数:10W,存储空间:14M,导入耗时:10秒
INSERT INTO SUPPLIER
(S_SUPPKEY,S_NAME,S_ADDRESS,S_NATIONKEY,S_PHONE,S_ACCTBAL,S_COMMENT)
SELECT S_SUPPKEY,S_NAME,S_ADDRESS,S_NATIONKEY,S_PHONE,S_ACCTBAL,S_COMMENT FROM OSS_SUPPLIER;
导入数据验证
地址:https://dms.aliyun.com/
路径:全部功能-SQLConsole-单库查询
此处选最大的1个表的数据导入结果进行确认,LINEITEM表得到预期数值与导入数据量一致,为5999W
select count(*) from LINEITEM;