MySQL存储引擎性能压测
一、表结构:
CREATE TABLE `huayu_tokudb` (
`waybill_id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '?????',
`waybill_no` varchar(20) NOT NULL COMMENT '???',
`post_date` datetime NOT NULL COMMENT '????????',
`order_id` varchar(50) DEFAULT NULL COMMENT '?????????',
`batch_no` varchar(50) DEFAULT NULL COMMENT '???',
`biz_product_id` varchar(20) NOT NULL COMMENT '??????',
`biz_product_no` varchar(20) NOT NULL COMMENT '??????',
`biz_product_name` varchar(50) NOT NULL COMMENT '??????',
`sender_type` varchar(20) DEFAULT NULL COMMENT '0 ?? 1????',
`sender_id` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_no` varchar(50) DEFAULT NULL COMMENT '??????(??????)',
`sender` varchar(200) DEFAULT NULL COMMENT '??????',
`sender_linker` varchar(200) DEFAULT NULL,
`sender_fixtel` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_mobile` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_addr` varchar(200) DEFAULT NULL COMMENT '??????',
`sender_country_no` varchar(20) DEFAULT NULL COMMENT '??????',
`sender_country_name` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_province_no` varchar(20) DEFAULT NULL COMMENT '??????',
`sender_province_name` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_city_no` varchar(20) DEFAULT NULL COMMENT '??????',
`sender_city_name` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_county_no` varchar(20) DEFAULT NULL COMMENT '??????',
`sender_county_name` varchar(50) DEFAULT NULL COMMENT '??????',
`sender_notes` varchar(1000) DEFAULT NULL COMMENT '????',
`receiver_no` varchar(50) DEFAULT NULL COMMENT '??????????????',
`receiver` varchar(200) DEFAULT NULL COMMENT '??????',
`receiver_linker` varchar(200) DEFAULT NULL,
`receiver_fixtel` varchar(50) DEFAULT NULL COMMENT '??????',
`receiver_mobile` varchar(50) DEFAULT NULL COMMENT '??????',
`receiver_addr` varchar(200) DEFAULT NULL COMMENT '??????',
`receiver_country_no` varchar(20) DEFAULT NULL COMMENT '??????',
`receiver_country_name` varchar(50) DEFAULT NULL COMMENT '??????',
`receiver_province_no` varchar(20) DEFAULT NULL COMMENT '??????',
`receiver_province_name` varchar(50) DEFAULT NULL COMMENT '??????',
`receiver_city_no` varchar(20) DEFAULT NULL COMMENT '??????',
`receiver_city_name` varchar(50) DEFAULT NULL COMMENT '??????',
`receiver_county_no` varchar(20) DEFAULT NULL COMMENT '??????',
`receiver_county_name` varchar(50) DEFAULT NULL COMMENT '??????',
`insurance_flag` char(1) DEFAULT NULL COMMENT '????????????1:?? 2:?? 3:??',
`insurance_amount` decimal(12,2) DEFAULT NULL COMMENT '??????',
`pickup_type` varchar(20) DEFAULT NULL COMMENT '?????0 ???????1 ??????',
`payment_mode` varchar(20) DEFAULT NULL COMMENT '????(???) 1:??? 2:??? 3:??? 4:??????? 5:?? 6:?/??? 7:???',
`real_weight` decimal(8,0) DEFAULT NULL COMMENT '????',
`fee_weight` decimal(8,0) DEFAULT NULL,
`volume_weight` decimal(8,0) DEFAULT NULL,
`length` decimal(8,0) DEFAULT NULL,
`width` decimal(8,0) DEFAULT NULL,
`height` decimal(8,0) DEFAULT NULL,
`quantity` int(11) DEFAULT NULL,
`packaging` varchar(20) DEFAULT NULL,
`package_material` varchar(20) DEFAULT NULL,
`goods_desc` varchar(200) DEFAULT NULL,
`contents_quantity` int(11) DEFAULT NULL,
`cod_flag` char(1) DEFAULT NULL,
`cod_amount` decimal(12,2) DEFAULT NULL,
`receipt_flag` char(1) DEFAULT NULL,
`receipt_waybill_no` varchar(20) DEFAULT NULL,
`receipt_fee_amount` decimal(12,2) DEFAULT NULL,
`insurance_premium_amount` decimal(12,2) DEFAULT NULL,
`valuable_flag` char(1) DEFAULT NULL,
`cargo_total_price` decimal(12,2) DEFAULT NULL,
`cargo_total_purchasing_price` decimal(12,2) DEFAULT NULL,
`allow_fee_flag` char(1) DEFAULT NULL,
`is_feed_flag` char(1) DEFAULT NULL,
`manual_fee_type` char(1) DEFAULT NULL,
`fee_date` datetime DEFAULT NULL,
`discount_rate` decimal(6,2) DEFAULT NULL,
`settlement_mode` varchar(20) DEFAULT NULL,
`payment_state` char(1) DEFAULT NULL,
`payment_date` datetime DEFAULT NULL,
`payment_id` varchar(50) DEFAULT NULL,
`manage_org_code` varchar(20) DEFAULT NULL,
`postage_suite_code` varchar(20) DEFAULT NULL,
`fee_area_suite_code` varchar(20) DEFAULT NULL,
`fee_area_code` varchar(20) DEFAULT NULL,
`fee_area_name` varchar(20) DEFAULT NULL,
`is_advance_flag` char(1) DEFAULT NULL,
`deliver_type` char(1) DEFAULT NULL,
`deliver_sign` varchar(50) DEFAULT NULL,
`deliver_date` char(1) DEFAULT NULL,
`deliver_notes` varchar(1000) DEFAULT NULL,
`deliver_pre_date` date DEFAULT NULL,
`battery_flag` char(1) DEFAULT NULL,
`is_jinguan` varchar(20) DEFAULT NULL,
`workbench` varchar(20) DEFAULT NULL,
`electronic_preferential_no` varchar(50) DEFAULT NULL,
`electronic_preferential_amount` decimal(12,2) DEFAULT NULL,
`pickup_attribute` char(1) DEFAULT NULL,
`adjust_type` varchar(20) DEFAULT NULL,
`postage_revoke` decimal(12,2) DEFAULT NULL,
`print_flag` char(1) DEFAULT NULL,
`print_date` datetime DEFAULT NULL,
`print_times` int(11) DEFAULT NULL,
`declare_source` varchar(20) DEFAULT NULL,
`declare_type` varchar(20) DEFAULT NULL,
`declare_curr_code` varchar(20) DEFAULT NULL,
`create_user_name` varchar(50) DEFAULT NULL,
`modify_user_name` varchar(50) DEFAULT NULL,
`volume` decimal(8,0) DEFAULT NULL COMMENT '??',
`contents_type_no` varchar(20) DEFAULT NULL COMMENT '??????',
`contents_type_name` varchar(200) DEFAULT NULL COMMENT '??????',
`contents_weight` decimal(8,0) DEFAULT NULL COMMENT '??????',
`transfer_type` varchar(20) DEFAULT NULL COMMENT '????',
`postage_total` decimal(12,2) NOT NULL COMMENT '???=????+????',
`postage_standard` decimal(12,2) DEFAULT NULL COMMENT '????',
`postage_paid` decimal(12,2) DEFAULT NULL COMMENT '????',
`postage_other` decimal(12,2) DEFAULT NULL COMMENT '????',
`is_deleted` char(1) DEFAULT '0' COMMENT '?????\n0??\n1??',
`create_user_id` bigint(20) DEFAULT NULL COMMENT '???id',
`gmt_created` datetime DEFAULT NULL COMMENT '????',
`modify_user_id` bigint(20) DEFAULT NULL COMMENT '???id',
`gmt_modified` datetime DEFAULT NULL COMMENT '????',
`reserved1` bigint(20) DEFAULT NULL COMMENT '????1',
`reserved2` bigint(20) DEFAULT NULL COMMENT '????2',
`reserved3` bigint(20) DEFAULT NULL COMMENT '????3',
`reserved4` varchar(200) DEFAULT NULL COMMENT '????4',
`reserved5` varchar(200) DEFAULT NULL COMMENT '????5',
`reserved6` varchar(200) DEFAULT NULL COMMENT '????6',
`reserved7` varchar(200) DEFAULT NULL COMMENT '????7',
`reserved8` varchar(200) DEFAULT NULL COMMENT '????8',
`reserved9` datetime DEFAULT NULL COMMENT '????9',
`reserved10` text COMMENT '????10',
`logistics_order_no` bigint(50) DEFAULT NULL,
`inner_channel` varchar(20) DEFAULT NULL,
`base_product_id` bigint(20) DEFAULT NULL,
`base_product_no` varchar(20) DEFAULT NULL,
`base_product_name` varchar(20) DEFAULT NULL,
`is_special_marketing` char(1) DEFAULT NULL,
`product_type` varchar(20) DEFAULT NULL,
`biz_product_type` varchar(20) DEFAULT NULL,
`product_reach_area` char(1) DEFAULT NULL,
`contents_attribute` char(1) DEFAULT NULL,
`contents_cargo_no` varchar(1000) DEFAULT NULL,
`cmd_code` varchar(20) DEFAULT NULL,
`manual_charge_reason` varchar(200) DEFAULT NULL,
`time_limit` char(1) DEFAULT NULL,
`io_type` varchar(20) DEFAULT NULL,
`ecommerce_no` varchar(20) DEFAULT NULL,
`waybill_type` varchar(20) DEFAULT NULL,
`pre_waybill_no` varchar(50) DEFAULT NULL,
`post_batch_id` varchar(50) DEFAULT NULL,
`biz_occur_date` datetime DEFAULT NULL,
`post_org_id` bigint(20) DEFAULT NULL,
`post_org_no` varchar(50) DEFAULT NULL,
`org_drds_code` varchar(50) DEFAULT NULL,
`post_org_simple_name` varchar(50) DEFAULT NULL,
`post_org_product_name` varchar(20) DEFAULT NULL,
`post_person_id` bigint(20) DEFAULT NULL,
`post_person_no` varchar(50) DEFAULT NULL,
`post_person_name` varchar(50) DEFAULT NULL,
`post_person_mobile` varchar(50) DEFAULT NULL,
`sender_warehouse_id` bigint(20) DEFAULT NULL,
`sender_warehouse_name` varchar(200) DEFAULT NULL,
`sender_safety_code` varchar(50) DEFAULT NULL,
`sender_im_type` varchar(20) DEFAULT NULL,
`sender_im_id` varchar(50) DEFAULT NULL,
`sender_id_type` varchar(20) DEFAULT NULL,
`sender_id_no` varchar(50) DEFAULT NULL,
`sender_id_encrypted_code` varchar(50) DEFAULT NULL,
`sender_agent_id_type` varchar(20) DEFAULT NULL,
`sender_agent_id_no` varchar(50) DEFAULT NULL,
`sender_id_encrypted_code_agent` varchar(50) DEFAULT NULL,
`sender_addr_additional` varchar(200) DEFAULT NULL,
`sender_district_no` varchar(20) DEFAULT NULL,
`sender_postcode` varchar(20) DEFAULT NULL,
`sender_gis` varchar(20) DEFAULT NULL,
`registered_customer_no` varchar(50) DEFAULT NULL,
`receiver_type` char(1) DEFAULT NULL,
`receiver_id` bigint(20) DEFAULT NULL,
`receiver_warehouse_id` bigint(20) DEFAULT NULL,
`receiver_warehouse_name` varchar(200) DEFAULT NULL,
`receiver_safety_code` varchar(50) DEFAULT NULL,
`receiver_im_type` varchar(20) DEFAULT NULL,
`receiver_im_id` varchar(50) DEFAULT NULL,
`receiver_addr_additional` varchar(200) DEFAULT NULL,
`receiver_district_no` varchar(20) DEFAULT NULL,
`receiver_postcode` varchar(20) DEFAULT NULL,
`receiver_gis` varchar(20) DEFAULT NULL,
`receiver_notes` varchar(1000) DEFAULT NULL,
`customer_manager_id` bigint(20) DEFAULT NULL,
`customer_manager_no` varchar(50) DEFAULT NULL,
`customer_manager_name` varchar(50) DEFAULT NULL,
`salesman_id` bigint(20) DEFAULT NULL,
`salesman_no` varchar(50) DEFAULT NULL,
`salesman_name` varchar(50) DEFAULT NULL,
`order_weight` decimal(8,0) DEFAULT NULL,
`post_org_name` varchar(200) DEFAULT NULL,
PRIMARY KEY (`waybill_id`),
KEY `auto_shard_key_post_date` (`post_date`),
KEY `auto_shard_key_waybill_no` (`waybill_no`)
) ENGINE=InnoDB row_format=compressed AUTO_INCREMENT=568964219 DEFAULT CHARSET=utf8 COMMENT='???????';
二、innodb表的数据大小:
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='qps_waybill_base_02';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 18.636G |
+------------------------------------------------------+
1 row in set (0.01 sec)
mysql> select count(*) from qps_waybill_base_02;
+----------+
| count(*) |
+----------+
| 15861881 |
+----------+
1 row in set (5.91 sec)
mysql>
三、存储引擎压缩说明
压缩的理念:
通过提高CPU利用率和节约成本,降低数据库容量及I/O负载,从而使数据吞吐率得到显著提高
压缩表减少了磁盘上数据库的大小,使得用户不必频繁地操作写入和读取便可以访问数据。对于 InnoDB的工作量以及传统的用户表而言(特别是在某些读取密集型的应用中,内存有足够的空间存储常用数据),数据压缩不仅大大减少了数据库所需的存储空间,而且还减少了 I/O的工作量,提高了数据吞吐率,从而节约开销处理成本。节省存储成本固然重要,但是减少 I/O成本更为关键。
通常情况下表结构中包含字符型数据列如char, varchar, text或blob等时,具有较高的压缩率,而一些二进制数据,如整形或浮点型数据列,和一些已经压缩的多媒体文档,如jpeg、jpg、png等格式图片及mp4、avi等格式视频,其压缩率都不会好,再对其进行压缩就是纯粹浪费CPU资源
1、tokudb存储引擎
阿里云数据库 MySQL 5.6 版支持通过 TokuDB 存储引擎压缩数据。经过大量测试表明,数据表从 InnoDB 存储引擎转到 TokuDB 存储引擎后,数据量可以减少 80% 到 90%,除了数据压缩外,TokuDB 存储引擎还支持事务和在线 DDL 操作,可以很好兼容运行于 MyISAM 或 InnoDB 存储引擎上的应用。
Tokudb压缩模式:
tokudb_fast: 使用quicklz 库的压缩模式。
tokudb_small: 使用 lzma 库的压缩模式。
tokudb_zlib: (默认)使用 zlib 库的压缩模式,提供了中等级别的压缩比和中等级别的CPU消耗。
tokudb_quicklz: 使用 quicklz 库的压缩模式, 提供了轻量级的压缩比和较低基本的CPU消耗。
tokudb_lzma: 使用lzma库压缩模式,提供了高压缩比和高CPU消耗。
tokudb_uncompressed: 不使用压缩模式。
TokuDB默认压缩算法为zlib,相比而言压缩的性价比非常高。
TokuDB 限制说明
· TokuDB 存储引擎无法支持外键 Foreign Key。
· TokuDB 存储引擎不适用于频繁大量读取的场景。
2、innodb存储引擎
innodb压缩的内部实现
压缩算法
压缩算法采用LZ77,在这个算法下,如果压缩效率好点的话,压缩后的大小和未压缩的数据大小比如在25-50%左右,在这种情况下就会有效地通过消耗一些CPU来减少IO操作,增大吞吐量,可以通过调节压缩程度(innodb_compression_level参数)来权衡压缩比和CPU使用率
innodb_compression_level:默认值为6,可选值0-9,数值越大表示压缩程度越大,消耗的CPU也越多
InnodbDB 限制说明
把innodb_file_per_table设置为1,innodb_file_format需要设置为Barracuda。然后在建新表或修改现有表的语句中加入row_format=compressed key_block_size=8就可以了。也可以仅加入row_format=compressed,这样key_block_size就取默认值8KB了。也可以仅加入key_block_size={1/2/4/8/16},也会默认开启压缩。
3、myisam存储引擎
myisam压缩后的表是不能进行修改的(除非先将表解除压缩,修改数据,然后再次压缩)。压缩表可以极大减少磁盘空间占用,减少磁盘I/O,提升查询性能。
压缩表支持索引,但索引也是只读的,可以使用myisampack对MyISAM表进行压缩(也叫打包)。
因为环境无法创建myisam表,会自动更改为innodb表,所以对于myisam表的压缩能力就没有测试
四、测试结果
1、转换成tokudb后表数据的大小:
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 3.502G |
+------------------------------------------------------+
1 row in set (0.01 sec)
mysql> select count(*) from huayu_tokudb;
+----------+
| count(*) |
+----------+
| 15861881 |
+----------+
1 row in set (3.08 sec)
压缩比:(18.636-3.502)/18.636=81%
2、innodb压缩后,压缩级别为6
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_innodb_compressed';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 4.325G |
+------------------------------------------------------+
1 row in set (0.00 sec)
mysql>
mysql> select count(*) from huayu_innodb_compressed;
+----------+
| count(*) |
+----------+
| 15861881 |
+----------+
1 row in set (5.25 sec)
压缩比:(18.636-4.325)/18.636=76%
3、innodb压缩级别改为9之后,insert的时间比较长,但是数据反而比级别为6的时候更大了。
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_innodb_compressed2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 4.508G |
+------------------------------------------------------+
1 row in set (0.00 sec)
mysql> select count(*) from huayu_innodb_compressed2;
+----------+
| count(*) |
+----------+
| 15861881 |
+----------+
1 row in set (5.11 sec)
将压缩级别改为2:影响不大
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 4.479G |
+------------------------------------------------------+
1 row in set (0.01 sec)
4、修改tokudb的buffer大小 loose_tokudb_buffer_pool_ratio=70 占用buffer pool的70%,与之前未修改时数据的占用空间大小是一样的
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 3.502G |
+-----------------------------------------------------+
1 row in set (0.01 sec)
5、修改tokudb默认的压缩模式(tokudb_row_format),由默认的tokudb_zlib,改为tokudb_fast,压缩能力不如默认的zlib库
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 4.347G |
+------------------------------------------------------+
压缩比:(18.636-4.347)/18.636=77%
6、修改tokudb压缩模式为tokudb_small,small库压缩能力超过默认的zlib库
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 2.947G |
+------------------------------------------------------+
1 row in set (0.00 sec)
压缩比:(18.636-2.947)/18.636=84%
7、修改tokudb压缩模式为tokudb_quicklz,使用quicklz库的轻量级压缩模式,确实压缩后数据量比较多,按时消耗的CPU消耗较低
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 4.347G |
+------------------------------------------------------+
1 row in set (0.00 sec)
压缩比:(18.636-4.347)/18.636=77%
8、修改tokudb压缩模式为tokudb_lzma,使用lzma库进行压缩,提供高压缩消耗高的cpu,这个模式同small模式的压缩能力相同
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 2.947G |
+------------------------------------------------------+
1 row in set (0.01 sec)
压缩比:(18.636-2.947)/18.636=84%
9、最后是tokudb_uncompressed模式,这两种结果是通过
1、修改已有数据的表的压缩模式,然后optimize table 的结果
2、新建相同的表,修改压缩模式参数后,插入数据对应的结果
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 16.431G |
+------------------------------------------------------+
1 row in set (0.00 sec)
mysql> select concat(round(sum(data_length/1024/1024/1024),3),'G') from information_schema.tables where table_name='huayu_tokudb2';
+------------------------------------------------------+
| concat(round(sum(data_length/1024/1024/1024),3),'G') |
+------------------------------------------------------+
| 16.187G |
+------------------------------------------------------+
1 row in set (0.00 sec)
测试结果概览:
innodb表源数据大小 |
innodb压缩(级别6) |
innodb压缩(级别9) |
innodb压缩(级别2) |
|
18.636G |
4.325G |
4.508G |
4.479G |
|
tokudb(tokudb_zlib) |
tokudb(tokudb_zlib)设置tokudb_buffer_poo70% |
tokudb(tokudb_fast) |
tokudb(tokudb_small) |
|
3.502G |
3.502G |
4.347G |
2.947G |
|
tokudb(tokudb_quicklz) |
tokudb(tokudb_lzma) |
tokudb(tokudb_uncompressed) |
|
|
4.347G |
2.947G |
16.431G |
|
五、个人总结
如果CPU有较高的性能空间,而且内存中有足够的空间来缓存数据,那么建议使用存储引擎压缩的功能。如果表中的数据不是大量的读取数据的,那么建议使用tokudb存储引擎进行压缩,性能提升会非常明显,但是tokudb不支持外键。否则,可以使用innodb存储引擎进行压缩。myisam存储引擎就先不考虑了,因为它不支持事务,并发低,并且数据存储也不安全。