1 实验用到的表及定义
1) emp表
MySQL [test]> show create table emp;
CREATE TABLE `emp` (
`id` bigint(11) NOT NULL AUTO_INCREMENT,
`name` varchar(255) DEFAULT NULL,
`dept_no` int(11) DEFAULT NULL,
`salary` bigint(10) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE = InnoDB AUTO_INCREMENT = 100005 DEFAULT CHARSET = utf8
PARTITION BY KEY(`id`)
PARTITIONS 2;
2) dept_b表,是广播表,dept表列定义相同,是非广播表
MySQL [test]> show create table dept_b;
CREATE TABLE `dept_b` (
`dept_no` tinyint(4) NOT NULL,
`dept_name` varchar(20) DEFAULT NULL,
`loc` varchar(30) DEFAULT NULL,
PRIMARY KEY (`dept_no`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8
BROADCAST
3)emp_bonus表
MySQL [test]> show create table emp_bonus;
CREATE TABLE `emp_bonus` (
`emp_id` bigint(20) NOT NULL,
`bonus` decimal(10, 0) DEFAULT NULL,
PRIMARY KEY (`emp_id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8
PARTITION BY KEY(`emp_id`)
PARTITIONS 2;
4)emp_bonus_tg2,定义同emp_bonus,设置了非默认tablegroup
MySQL [test]> show create table emp_bonus_tg2;
CREATE TABLE `emp_bonus_tg2` (
`emp_id` bigint(20) NOT NULL,
`bonus` decimal(10, 0) DEFAULT NULL,
PRIMARY KEY (`emp_id`)
) ENGINE = InnoDB DEFAULT CHARSET = utf8
PARTITION BY KEY(`emp_id`)
PARTITIONS 2
tablegroup = `test_tg`;
2 多表连接查询时的执行计划(同时使用tablegroup和广播表)
1) emp,emp_bonus在一个表组内,dept_b是广播表,整个join都下推到了存储节点执行。
MySQL [test]> explain select a.name, a.salary, b.bonus,c.dept_name from emp a, emp_bonus b,dept_b c
where a.id=b.emp_id and a.dept_no=c.dept_no and a.id in (20, 6);
LOGICAL EXECUTIONPLAN
Gather(concurrent=true)
LogicalView(tables="emp[p1,p2],emp_bonus[p1,p2],dept_b", shardCount=2,
sql="SELECT `emp`.`name`, `emp`.`salary`, `emp_bonus`.`bonus`, `dept_b`.`dept_name`
FROM `emp` AS `emp` INNER JOIN `emp_bonus` AS `emp_bonus`
ON ((`emp`.`id` = `emp_bonus`.`emp_id`) AND (`emp`.`id` IN(?, ?)))
INNER JOIN `dept_b` AS `dept_b` ON (`emp`.`dept_no` = `dept_b`.`dept_no`)")
HitCache:false
Source:PLAN_CACHE
TemplateId: 8224055c
2) emp,emp_bonus在一个表组内,dept为非广播表,emp和emp_bonus的连接操作被下推到存储节点,这两个表join后的结果再同dept在计算节点进行join。
MySQL [test]> explain select a.name, a.salary, b.bonus,c.dept_name from emp a, emp_bonus b,dept c
where a.id=b.emp_id and a.dept_no=c.dept_no and a.id in (20, 6);
LOGICAL EXECUTIONPLAN
Project(name="name", salary="salary", bonus="bonus", dept_name="dept_name")
BKAJoin(condition="dept_no = dept_no", type="inner")
Gather(concurrent=true)
LogicalView(tables="emp[p1,p2],emp_bonus[p1,p2]", shardCount=2,
sql="SELECT `emp`.`name`, `emp`.`dept_no`, `emp`.`salary`, `emp_bonus`.`bonus`
FROM `emp` AS `emp` INNER JOIN `emp_bonus` AS `emp_bonus`
ON ((`emp`.`id` = `emp_bonus`.`emp_id`) AND (`emp`.`id` IN(?, ?)))")
Gather(concurrent=true)
LogicalView(tables="dept[p1,p2,p3,...p16]", shardCount=16,
sql="SELECT `dept_no`, `dept_name` FROM `dept` AS `dept`
WHERE (`dept_no` IN (...))")
HitCache:true
Source:PLAN_CACHE
TemplateId: 04a2abdf
3)emp,emp_bonus不在同一个tablegroup内,dept_b为广播表,emp同dept_b的连接操作下推到存储节点,这两个表的join结果同emp_bonus_tg2表在计算节点做连接。
MySQL [test]> explain select a.name, a.salary, b.bonus,c.dept_name from emp a, emp_bonus_tg2 b,dept_b c
where a.id=b.emp_id and a.dept_no=c.dept_no and a.id in (20, 6);
LOGICAL EXECUTIONPLAN
Project(name="name", salary="salary", bonus="bonus", dept_name="dept_name")
BKAJoin(condition="emp_id = id", type="inner")
Gather(concurrent=true)
LogicalView(tables="emp[p1,p2],dept_b", shardCount=2,
sql="SELECT `emp`.`id`, `emp`.`name`, `emp`.`salary`, `dept_b`.`dept_name` FROM
`emp` AS `emp` INNER JOIN `dept_b` AS `dept_b` ON ((`emp`.`dept_no` = `dept_b`.`dept_no`)
AND (`emp`.`id` IN(?, ?)))")
Gather(concurrent=true)
LogicalView(tables="emp_bonus_tg2[p1,p2]", shardCount=2, sql="SELECT `emp_id`, `bonus` FROM
`emp_bonus_tg2` AS `emp_bonus_tg2` WHERE ((`emp_id` IN(?, ?)) AND (`emp_id` IN (...)))")
HitCache:false
Source:PLAN_CACHE
TemplateId: 5b5bf1fc
3 同一tablegroup下多个表的连接
1) 新建一个表,采用同EMP相同的分区键类型及分区策略
CREATE TABLE `emp_info` (
`id` bigint NOT NULL,
`address` varchar(40) DEFAULT NULL,
`telephone` int DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb3
PARTITION BY KEY(`id`)
PARTITIONS 2;
2) 导入部分数据
MySQL [test]> select count(*) from emp_info;
+----------+
| count(*) |
+----------+
| 149 |
+----------+
1 row in set (0.08 sec)
3) 检查一下table group状态,通过tg1 中已经有三个表,新建的表默认加入到与其分区一致的表组中。
MySQL [test]> show tablegroup where TABLE_GROUP_NAME like 'tg1';
+--------------+----------------+------------------+----------+--------------+------------------+--------------+--------------+------------+-+
| TABLE_SCHEMA | TABLE_GROUP_ID | TABLE_GROUP_NAME | IS_MANUAL_CREATE | CUR_PART_KEY | MAX_PART_KEY | PART_COUNT | TABLE_COUNT | INDEX_COUNT |
+--------------+----------------+------------------+----------+--------------+------------------+--------------+--------------+------------+-+
| test | 1 | tg1 | 0 | KEY(BIGINT) | KEY(BIGINT) | 2 | 3 | 0 |
+--------------+----------------+------------------+----------+--------------+------------------+--------------+--------------+------------+-+
1 row in set (0.09 sec)
3) 看一下执行计划,整个连接操作被下推到存储节点执行
MySQL [test]> explain select a.name, a.salary, b.bonus,c.address from emp a, emp_bonus b,emp_info c where a.id=b.emp_id and a.id=c.id and a.name='Jaine Seavers';
LOGICAL EXECUTIONPLAN
Gather(concurrent=true)
LogicalView(tables="emp[p1,p2],emp_bonus[p1,p2],emp_info[p1,p2]", shardCount=2,
sql="SELECT `emp`.`name`, `emp`.`salary`, `emp_bonus`.`bonus`, `emp_info`.`address`
FROM `emp` AS `emp` INNER JOIN `emp_bonus` AS `emp_bonus` ON (`emp`.`id` = `emp_bonus`.`emp_id`)
INNER JOIN `emp_info` AS `emp_info` ON ((`emp_bonus`.`emp_id` = `emp_info`.`id`)
AND (`emp`.`id` = `emp_info`.`id`))") |
HitCache:true
Source:PLAN_CACHE
TemplateId: 294ed663
4 分析及结论
PolarDB-X可以充分利用表组及广播表,尽可能多的将操作下推到存储节点执行,新建表自动键入分区策略一致的表组这个特性可以减轻开发及运维负担。