我是靠谱客的博主 喜悦美女,最近开发中收集的这篇文章主要介绍Apache Hudi 表目录结构Apache Hudi 表目录结构,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

Apache Hudi 表目录结构

记录**一部分** 表目录结构和文件格式。便于对 Hudi 的设计理念和表的组织格式进行直观的理解。

数据由官网示例运行后产生。

一、COW 表类型组织逻辑

表名字为 stock_ticks_cow

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
drwxr-xr-xrootsupergroup0 BOct 15 16:2000 B.hoodie
drwxr-xr-xrootsupergroup0 BOct 15 15:4500 B2018

1.1 ?./tablename/.hoodie

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup968 BOct 15 15:453128 MB20201015074528.rollback
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074528.rollback.inflight
-rw-r–r--rootsupergroup2.2 KBOct 15 15:453128 MB20201015074529.commit
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074529.commit.requested
-rw-r–r--rootsupergroup350 BOct 15 15:453128 MB20201015074529.inflight
-rw-r–r--rootsupergroup2.21 KBOct 15 16:203128 MB20201015082021.commit
-rw-r–r--rootsupergroup0 BOct 15 16:203128 MB20201015082021.commit.requested
-rw-r–r--rootsupergroup1.01 KBOct 15 16:203128 MB20201015082021.inflight
-rw-r–r--rootsupergroup213 BOct 15 15:363128 MBhoodie.properties
drwxr-xr-xrootsupergroup0 BOct 15 15:3600 B.aux
drwxr-xr-xrootsupergroup0 BOct 15 16:2000 B.temp
drwxr-xr-xrootsupergroup0 BOct 15 15:3600 Barchived

1.1.1 ?.rollback 文件格式

Objavro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.hudi.avro.model","fields":[{"name":"startRollbackTime","type":{"type":"string","avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPartitionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java.string":"String"}},{"name":"version","type":["int","null"],"default":1}]}

1.1.2 ?.rollback.inflight 文件格式

暂无复现场景

1.1.3 ?.commit 文件格式

{
"partitionToWriteStats" : {
"2018/08/31" : [ {
"fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",
"path" : "2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet",
"prevCommit" : "null",
"numWrites" : 197,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 197,
"totalWriteBytes" : 443701,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "2018/08/31",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 443701
} ]
},
"compacted" : false,
"extraMetadata" : {
"ROLLING_STAT" : "{n
"partitionToRollingStats" : {n
"2018/08/31" : {n
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : {n
"fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",n
"inserts" : 197,n
"upserts" : 0,n
"deletes" : 0,n
"totalInputWriteBytesToDisk" : 0,n
"totalInputWriteBytesOnDisk" : 443701n
}n
}n
},n
"actionType" : "commit"n}",
"schema" : "{"type":"record","name":"stock_ticks","fields":[{"name":"volume","type":"long"},{"name":"ts","type":"string"},{"name":"symbol","type":"string"},{"name":"year","type":"int"},{"name":"month","type":"string"},{"name":"high","type":"double"},{"name":"low","type":"double"},{"name":"key","type":"string"},{"name":"date","type":"string"},{"name":"close","type":"double"},{"name":"open","type":"double"},{"name":"day","type":"string"}]}",
"deltastreamer.checkpoint.key" : "stock_ticks,0:3482"
},
"fileIdAndRelativePaths" : {
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : "2018/08/31/8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet"
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 793,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0
}

1.1.4 ?.commit.requested 文件格式

暂无复现场景

1.1.5 ?.inflight 文件格式

{
"partitionToWriteStats" : {
"2018/08/31" : [ {
"fileId" : "8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0",
"path" : null,
"prevCommit" : "20201015074529",
"numWrites" : 0,
"numDeletes" : 0,
"numUpdateWrites" : 99,
"numInserts" : 0,
"totalWriteBytes" : 0,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : null,
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 0
} ]
},
"compacted" : false,
"extraMetadata" : { },
"totalScanTime" : 0,
"totalCreateTime" : 0,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0,
"fileIdAndRelativePaths" : {
"8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0" : null
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0
}

1.1.6 hoodie.properties

#Properties saved on Thu Oct 15 07:36:26 UTC 2020
#Thu Oct 15 07:36:26 UTC 2020
hoodie.table.name=stock_ticks_cow
hoodie.archivelog.folder=archived
hoodie.table.type=COPY_ON_WRITE
hoodie.timeline.layout.version=1

1.2 ?/tablename/part-n/pn-n/pn-n-n…

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup93 BOct 15 15:453128 MB.hoodie_partition_metadata
-rw-r–r--rootsupergroup433.3 KBOct 15 15:453128 MB8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-22_20201015074529.parquet
-rw-r–r--rootsupergroup433.01 KBOct 15 16:203128 MB8c8db0e3-964d-4a2c-b0fe-f4306d08b5c8-0_0-22-25_20201015082021.parquet

1.2.1 .hoodie_partition_metadata 文件格式

#partition metadata
#Thu Oct 15 07:45:31 UTC 2020
commitTime=20201015074529
partitionDepth=3

1.2.2 ?.parquet 文件格式

5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 b204 0078
5454 0112 0000 0015 0015 1415 382c 158a
0315 0415 0615 081c 180e 3230 3230 3130
3135 3037 3435 3239 180e 3230 3230 3130
3135 3037 3435 3239 1600 280e 3230 3230
3130 3135 3037 3435 3239 180e 3230 3230
......

二、MOR 类型表目录结构

表名字为 stock_ticks_mor

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
drwxr-xr-xrootsupergroup0 BOct 15 16:2000 B.hoodie
drwxr-xr-xrootsupergroup0 BOct 15 15:4500 B2018

2.1 ?/tablename/.hoodie

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup968 BOct 15 15:453128 MB20201015074553.rollback
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074553.rollback.inflight
-rw-r–r--rootsupergroup2.21 KBOct 15 15:453128 MB20201015074554.deltacommit
-rw-r–r--rootsupergroup350 BOct 15 15:453128 MB20201015074554.deltacommit.inflight
-rw-r–r--rootsupergroup0 BOct 15 15:453128 MB20201015074554.deltacommit.requested
-rw-r–r--rootsupergroup2.26 KBOct 15 16:203128 MB20201015082051.deltacommit
-rw-r–r--rootsupergroup1.01 KBOct 15 16:203128 MB20201015082051.deltacommit.inflight
-rw-r–r--rootsupergroup0 BOct 15 16:203128 MB20201015082051.deltacommit.requested
-rw-r–r--rootsupergroup305 BOct 15 15:373128 MBhoodie.properties
drwxr-xr-xrootsupergroup0 BOct 15 15:3700 B.aux
drwxr-xr-xrootsupergroup0 BOct 15 15:4500 B.temp
drwxr-xr-xrootsupergroup0 BOct 15 15:3700 Barchived

2.1.1 *.rollback 文件格式

Objavro.schemaÚ
{"type":"record","name":"HoodieRollbackMetadata","namespace":"org.apache.hudi.avro.model","fields":[{"name":"startRollbackTime","type":{"type":"string","avro.java.string":"String"}},{"name":"timeTakenInMillis","type":"long"},{"name":"totalFilesDeleted","type":"int"},{"name":"commitsRollback","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"partitionMetadata","type":{"type":"map","values":{"type":"record","name":"HoodieRollbackPartitionMetadata","fields":[{"name":"partitionPath","type":{"type":"string","avro.java.string":"String"}},{"name":"successDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}},{"name":"failedDeleteFiles","type":{"type":"array","items":{"type":"string","avro.java.string":"String"}}}]},"avro.java.string":"String"}},{"name":"version","type":["int","null"],"default":1}]}

2.1.2 *.rollback.inflight 文件格式

暂无场景复现

2.1.3 *.deltacommit 文件格式

{
"partitionToWriteStats" : {
"2018/08/31" : [ {
"fileId" : "c7922a25-5d97-4add-8580-127fd14aa494-0",
"path" : "2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet",
"prevCommit" : "null",
"numWrites" : 197,
"numDeletes" : 0,
"numUpdateWrites" : 0,
"numInserts" : 197,
"totalWriteBytes" : 443699,
"totalWriteErrors" : 0,
"tempPath" : null,
"partitionPath" : "2018/08/31",
"totalLogRecords" : 0,
"totalLogFilesCompacted" : 0,
"totalLogSizeCompacted" : 0,
"totalUpdatedRecordsCompacted" : 0,
"totalLogBlocks" : 0,
"totalCorruptLogBlock" : 0,
"totalRollbackBlocks" : 0,
"fileSizeInBytes" : 443699
} ]
},
"compacted" : false,
"extraMetadata" : {
"ROLLING_STAT" : "{n
"partitionToRollingStats" : {n
"2018/08/31" : {n
"c7922a25-5d97-4add-8580-127fd14aa494-0" : {n
"fileId" : "c7922a25-5d97-4add-8580-127fd14aa494-0",n
"inserts" : 197,n
"upserts" : 0,n
"deletes" : 0,n
"totalInputWriteBytesToDisk" : 0,n
"totalInputWriteBytesOnDisk" : 443699n
}n
}n
},n
"actionType" : "deltacommit"n}",
"schema" : "{"type":"record","name":"stock_ticks","fields":[{"name":"volume","type":"long"},{"name":"ts","type":"string"},{"name":"symbol","type":"string"},{"name":"year","type":"int"},{"name":"month","type":"string"},{"name":"high","type":"double"},{"name":"low","type":"double"},{"name":"key","type":"string"},{"name":"date","type":"string"},{"name":"close","type":"double"},{"name":"open","type":"double"},{"name":"day","type":"string"}]}",
"deltastreamer.checkpoint.key" : "stock_ticks,0:3482"
},
"fileIdAndRelativePaths" : {
"c7922a25-5d97-4add-8580-127fd14aa494-0" : "2018/08/31/c7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet"
},
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 1280,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0
}

2.1.4 *.deltacommit.inflight 文件格式

{
"partitionToWriteStats" : { },
"compacted" : false,
"extraMetadata" : { },
"fileIdAndRelativePaths" : { },
"totalRecordsDeleted" : 0,
"totalLogRecordsCompacted" : 0,
"totalScanTime" : 0,
"totalCreateTime" : 0,
"totalUpsertTime" : 0,
"totalCompactedRecordsUpdated" : 0,
"totalLogFilesCompacted" : 0,
"totalLogFilesSize" : 0
}

2.1.5 *.deltacommit.requested 文件格式

暂无复现场景

2.1.6 hoodie.properties 文件格式

#Properties saved on Thu Oct 15 07:37:05 UTC 2020
#Thu Oct 15 07:37:05 UTC 2020
hoodie.compaction.payload.class=org.apache.hudi.common.model.OverwriteWithLatestAvroPayload
hoodie.table.name=stock_ticks_mor
hoodie.archivelog.folder=archived
hoodie.table.type=MERGE_ON_READ
hoodie.timeline.layout.version=1

2.2 ?/${tablename}/分区n/分区n-n/分区n-n-n/…

PermissionOwnerGroupSizeLast ModifiedReplicationBlock SizeName
-rw-r–r--rootsupergroup21.04 KBOct 15 16:203512 MB.c7922a25-5d97-4add-8580-127fd14aa494-0_20201015074554.log.1_0-22-25
-rw-r–r--rootsupergroup93 BOct 15 15:453128 MB.hoodie_partition_metadata
-rw-r–r--rootsupergroup433.3 KBOct 15 15:453128 MBc7922a25-5d97-4add-8580-127fd14aa494-0_0-22-22_20201015074554.parquet

2.2.1 ?.log.? 文件格式

2348 5544 4923 0000 0000 0000 541e 0000
0001 0000 0003 0000 0002 0000 0000 0000
000e 3230 3230 3130 3135 3038 3230 3531
......

2.2.2 .hoodie_partition_metadata 文件格式

#partition metadata
#Thu Oct 15 07:45:56 UTC 2020
commitTime=20201015074554
partitionDepth=3

2.2.3 ?.parquet 文件格式

5041 5231 1504 1524 154c 4c15 0215 0400
001f 8b08 0000 0000 0000 00e3 6360 6030
3230 3230 3430 3435 3037 3135 3501 0002
bea4 3012 0000 0015 0015 1415 382c 158a
......

最后

以上就是喜悦美女为你收集整理的Apache Hudi 表目录结构Apache Hudi 表目录结构的全部内容,希望文章能够帮你解决Apache Hudi 表目录结构Apache Hudi 表目录结构所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(32)

评论列表共有 0 条评论

立即
投稿
返回
顶部