概述
MergeTree引擎
- 一.MergeTree引擎
一.MergeTree引擎
MergeTree系列的表引擎是clickhouse数据存储功能的核心,它们提供了用于弹性和高性能数据检索的大多数功能:列存储,自定义分区,稀疏的主索引,辅助数据跳过索引等
主要特点:
- 存储按主键排序的数据(必须有order by(主键,…))
- 如果指定了分区键,则可以使用分区
- 数据复制支持
- 数据采样支持
create table tb_merge_tree(
uid UInt8,
name String,
birthday Date,
city String,
gender String
)engine=MergeTree()
primary key uid
order by (uid,birthday);
insert into tb_merge_tree values
(1,'zs',toDate(now()),'HZ','M'),
(2,'ls','2021-09-27','HZ','M'),
(3,'ww','2021-08-18','SH','F'),
(4,'zl','1999-09-27','JS','F');
select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│
1 │ zs
│ 2021-08-30 │ HZ
│ M
│
│
2 │ ls
│ 2021-09-27 │ HZ
│ M
│
│
3 │ ww
│ 2021-08-18 │ SH
│ F
│
│
4 │ zl
│ 1999-09-27 │ JS
│ F
│
└─────┴──────┴────────────┴──────┴────────┘
**看一下底层存储**
[root@spark-140 test1]# cd tb_merge_tree/
[root@spark-140 tb_merge_tree]# ll
total 12
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
//存着表的所有数据
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root
1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_1_1_0/
[root@spark-140 all_1_1_0]# ll
total 28
-rw-r----- 1 root root 187 Aug 30 17:18 checksums.txt
//数据安全校验
-rw-r----- 1 root root 109 Aug 30 17:18 columns.txt
//列信息文件,存放列字段信息
-rw-r----- 1 root root
1 Aug 30 17:18 count.txt
//记录当前数据的条数
-rw-r----- 1 root root 174 Aug 30 17:18 data.bin
//数据文件
-rw-r----- 1 root root 176 Aug 30 17:18 data.mrk3
//列字段标记文件
-rw-r----- 1 root root
10 Aug 30 17:18 default_compression_codec.txt
-rw-r----- 1 root root
2 Aug 30 17:18 primary.idx
//一级索引文件,用于存放稀疏索引
[root@spark-140 all_1_1_0]# cat count.txt4
insert into tb_merge_tree values
(1,'zs',toDate(now()),'HZ','M'),
(2,'ls','2021-09-27','HZ','M'),
(3,'ww','2021-08-18','SH','F'),
(4,'zl','1999-09-27','JS','F');
//再次执行一次插入操作
select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│
1 │ zs
│ 2021-08-30 │ HZ
│ M
│
│
2 │ ls
│ 2021-09-27 │ HZ
│ M
│
│
3 │ ww
│ 2021-08-18 │ SH
│ F
│
│
4 │ zl
│ 1999-09-27 │ JS
│ F
│
└─────┴──────┴────────────┴──────┴────────┘
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│
1 │ zs
│ 2021-08-30 │ HZ
│ M
│
│
2 │ ls
│ 2021-09-27 │ HZ
│ M
│
│
3 │ ww
│ 2021-08-18 │ SH
│ F
│
│
4 │ zl
│ 1999-09-27 │ JS
│ F
│
└─────┴──────┴────────────┴──────┴────────┘
//主键没有唯一要求,是可以重复的
[root@spark-140 tb_merge_tree]# ll
total 16
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 17:26 all_2_2_0
//第二次插入操作的文件
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root
1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_2_2_0/
[root@spark-140 all_2_2_0]# ll
total 28
-rw-r----- 1 root root 187 Aug 30 17:26 checksums.txt
-rw-r----- 1 root root 109 Aug 30 17:26 columns.txt
-rw-r----- 1 root root
1 Aug 30 17:26 count.txt
-rw-r----- 1 root root 174 Aug 30 17:26 data.bin
-rw-r----- 1 root root 176 Aug 30 17:26 data.mrk3
-rw-r----- 1 root root
10 Aug 30 17:26 default_compression_codec.txt
-rw-r----- 1 root root
2 Aug 30 17:26 primary.idx
[root@spark-140 all_2_2_0]# cat count.txt
4
**合并这两个数据块**
optimize table tb_merge_tree [final];
//final关键字是强制合并的意思,用于多个表合并时
select * from tb_merge_tree;
┌─uid─┬─name─┬───birthday─┬─city─┬─gender─┐
│
1 │ zs
│ 2021-08-30 │ HZ
│ M
│
│
1 │ zs
│ 2021-08-30 │ HZ
│ M
│
│
2 │ ls
│ 2021-09-27 │ HZ
│ M
│
│
2 │ ls
│ 2021-09-27 │ HZ
│ M
│
│
3 │ ww
│ 2021-08-18 │ SH
│ F
│
│
3 │ ww
│ 2021-08-18 │ SH
│ F
│
│
4 │ zl
│ 1999-09-27 │ JS
│ F
│
│
4 │ zl
│ 1999-09-27 │ JS
│ F
│
└─────┴──────┴────────────┴──────┴────────┘
这时再看一下底层存储
[root@spark-140 tb_merge_tree]# ll
total 20
drwxr-x--- 2 root root 4096 Aug 30 17:18 all_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 17:31 all_1_2_1
//这是合并操作的文件
drwxr-x--- 2 root root 4096 Aug 30 17:26 all_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 17:18 detached
-rw-r----- 1 root root
1 Aug 30 17:18 format_version.txt
[root@spark-140 tb_merge_tree]# cd all_1_2_1/
//all_1_2_1数字分别代表什么
1代表最低版本,2代表最高版本,1代表合并操作一次
[root@spark-140 all_1_2_1]# ll
total 28-rw-r----- 1 root root 187 Aug 30 17:31 checksums.txt
-rw-r----- 1 root root 109 Aug 30 17:31 columns.txt
-rw-r----- 1 root root
1 Aug 30 17:31 count.txt
-rw-r----- 1 root root 205 Aug 30 17:31 data.bin
-rw-r----- 1 root root 176 Aug 30 17:31 data.mrk3
-rw-r----- 1 root root
10 Aug 30 17:31 default_compression_codec.txt
-rw-r----- 1 root root
2 Aug 30 17:31 primary.idx
[root@spark-140 all_1_2_1]# cat count.txt
8
create table tb_merge_tree_partition(
uid UInt8,
name String,
birthday DateTime
)engine=MergeTree()
partition by toDate(birthday)
order by uid;
insert into tb_merge_tree_partition values
(1,'zs','2020-09-27 10:00:00'),
(2,'ls','2020-09-27 11:00:00'),
(3,'ww','2020-08-18 10:00:00'),
(4,'zl','2020-08-18 11:00:00');
select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│
1 │ zs
│ 2020-09-27 10:00:00 │
│
2 │ ls
│ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│
3 │ ww
│ 2020-08-18 10:00:00 │
│
4 │ zl
│ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘
insert into tb_merge_tree_partition values
(11,'zss','2020-09-27 10:00:00'),
(12,'lss','2020-09-27 11:00:00'),
(13,'www','2020-08-18 10:00:00'),
(14,'zll','2020-08-18 11:00:00');
select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│
1 │ zs
│ 2020-09-27 10:00:00 │
│
2 │ ls
│ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│
3 │ ww
│ 2020-08-18 10:00:00 │
│
4 │ zl
│ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│
11 │ zss
│ 2020-09-27 10:00:00 │
│
12 │ lss
│ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│
13 │ www
│ 2020-08-18 10:00:00 │
│
14 │ zll
│ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘
**看一下底层存储**
cd /data/clickhouse/data/test1/tb_merge_tree_partition
[root@spark-140 tb_merge_tree_partition]# ll
total 24
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200818_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200818_4_4_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200927_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200927_3_3_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 detached
-rw-r----- 1 root root
1 Aug 30 19:58 format_version.txt
**合并操作**
optimize table tb_merge_tree_partition final;
select * from tb_merge_tree_partition;
┌─uid─┬─name─┬────────────birthday─┐
│
1 │ zs
│ 2020-09-27 10:00:00 │
│
2 │ ls
│ 2020-09-27 11:00:00 │
│
11 │ zss
│ 2020-09-27 10:00:00 │
│
12 │ lss
│ 2020-09-27 11:00:00 │
└─────┴──────┴─────────────────────┘
┌─uid─┬─name─┬────────────birthday─┐
│
3 │ ww
│ 2020-08-18 10:00:00 │
│
4 │ zl
│ 2020-08-18 11:00:00 │
│
13 │ www
│ 2020-08-18 10:00:00 │
│
14 │ zll
│ 2020-08-18 11:00:00 │
└─────┴──────┴─────────────────────┘
[root@spark-140 tb_merge_tree_partition]# ll
total 28
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200818_2_2_0
drwxr-x--- 2 root root 4096 Aug 30 20:01 20200818_2_4_1
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200818_4_4_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 20200927_1_1_0
drwxr-x--- 2 root root 4096 Aug 30 20:02 20200927_1_3_1
drwxr-x--- 2 root root 4096 Aug 30 19:59 20200927_3_3_0
drwxr-x--- 2 root root 4096 Aug 30 19:58 detached
-rw-r----- 1 root root
1 Aug 30 19:58 format_version.txt
最后
以上就是超帅帽子为你收集整理的clickhouse引擎--MergeTree引擎一.MergeTree引擎的全部内容,希望文章能够帮你解决clickhouse引擎--MergeTree引擎一.MergeTree引擎所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复