一、背景
每天上百亿的日志数据实时查询是个挑战,在架构设计上采用了Kafka + Flink + Clickhouse+Redash,实现海量数据的实时分析。计算层,我们开发了基于Flink计算引擎的实时数据平台,简化开发流程,数据通过配置化实现动态Schema生成,底层数据解析统一,无需重复造轮子,整个数据链路,从数据的采集,转换,存储,可视化,无需写一行代码,配置化完成。本文主要介绍实时日志数据写入Clickhouse的实践。
Flink Clickhouse Sink
复制代码
1
2
3
4
5
6<dependency> <groupId>ru.yandex.clickhouse</groupId> <artifactId>clickhouse-jdbc</artifactId> <version>0.1.50</version> </dependency>
复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265public class ClickhouseSink extends RichSinkFunction<Row> implements Serializable { private String tablename; private String[] tableColums; private List<String> types; private String[] columns; private String username; private String password; private String[] ips; private String drivername = "ru.yandex.clickhouse.ClickHouseDriver"; private List<Row> list = new ArrayList<>(); private List<PreparedStatement> preparedStatementList = new ArrayList<>(); private List<Connection> connectionList = new ArrayList<>(); private List<Statement> statementList = new ArrayList<>(); private long lastInsertTime = 0L; private long insertCkTimenterval = 4000L; // 插入的批次 private int insertCkBatchSize = 10000; public ClickhouseSink(String tablename, String username, String password, String[] ips, String[] tableColums, List<String> types, String[] columns) { this.tablename = tablename; this.username = username; this.password = password; this.ips = ips; this.tableColums = tableColums; this.types = types; this.columns = columns; // 新增字段 } // 插入数据 public void insertData(List<Row> rows, PreparedStatement preparedStatement, Connection connection) throws SQLException { for (int i = 0; i < rows.size(); ++i) { Row row = rows.get(i); for (int j = 0; j < this.tableColums.length; ++j) { if (null != row.getField(j)) { preparedStatement.setObject(j + 1, row.getField(j)); } else { preparedStatement.setObject(j + 1, "null"); } } preparedStatement.addBatch(); } preparedStatement.executeBatch(); connection.commit(); preparedStatement.clearBatch(); } /** * 新增字段修改表添加列 * * @param statement * @throws Exception */ public void tableAddColumn(Statement statement) { try { if (null != this.columns && this.columns.length > 0) { /** * table 增加字段 */ // 获取原表字段名 String querySql = "select * from " + this.tablename + " limit 1"; ResultSet rs = statement.executeQuery(querySql); ResultSetMetaData rss = rs.getMetaData(); int columnCount = rss.getColumnCount(); List<String> orgTabCols = new ArrayList<>(); for (int i = 1; i <= columnCount; ++i) { orgTabCols.add(rss.getColumnName(i)); } // 对比两个数组,判断新增字段是否在原来的表中 Collection collection = new ArrayList<String>(orgTabCols); boolean exists = collection.removeAll(Arrays.asList(this.columns)); // 新增字段不在原来的表中,执行添加列操作 if (!exists) { for (int i = 0; i < this.columns.length; ++i) { String str = ""; String str_all = ""; StringBuilder sb = null; StringBuilder sb_all = null; if (i == 0) { sb.append("alter table " ).append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(orgTabCols.get(orgTabCols.size() - 1)); sb_all.append("alter table " ).append("_all").append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(orgTabCols.get(orgTabCols.size() - 1)); } else { sb.append("alter table " ).append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(this.columns[i - 1]); sb_all.append("alter table " ).append("_all").append(this.tablename).append(" add column ").append(this.columns[i]).append(" String").append(" after ").append(this.columns[i - 1]); } if (StringUtils.isNotEmpty(sb.toString())) { statement.executeUpdate(sb.toString()); } if (StringUtils.isNotEmpty(sb_all.toString())) { statement.executeUpdate(sb_all.toString()); } } } } } catch (Exception e) { e.printStackTrace(); } } // 根据IP创建连接 public void createConnection() throws Exception { // 插入语句 String insertStr = StrUtils.clickhouseInsertValue(this.tableColums, this.tablename); // 创建表 List<String> createtableStrList = StrUtils.clickhouseCreatTable(this.tableColums, this.tablename, Constant.CKCLUSTERNAME, this.tableColums[3], this.types); // 创建数据库 String create_database_str = "create database if not exists " + this.tablename.split("\.")[0]; for (String ip : this.ips) { String url = "jdbc:clickhouse://" + ip + ":8123"; Connection connection = DriverManager.getConnection(url, this.username, this.password); Statement statement = connection.createStatement(); // 执行创建数据库 statement.executeUpdate(create_database_str); // 执行创建表 statement.executeUpdate(createtableStrList.get(0)); statement.executeUpdate(createtableStrList.get(1)); // 增加表字段 tableAddColumn(statement); this.statementList.add(statement); PreparedStatement preparedStatement = connection.prepareStatement(insertStr); connection.setAutoCommit(false); this.preparedStatementList.add(preparedStatement); this.connectionList.add(connection); } } @Override public void open(Configuration parameters) throws Exception { Class.forName(this.drivername); // 创建连接 createConnection(); } @Override public void invoke(Row row, Context context) throws Exception { // 轮询写入各个local表,避免单节点数据过多 if (null != row) { Random random = new Random(); int index = random.nextInt(this.ips.length); switch (index) { case 0: if(list.size() >= this.insertCkBatchSize || isTimeToDoInsert()) { insertData(list,preparedStatementList.get(0),connectionList.get(0)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 1: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(1),connectionList.get(1)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 2: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(2),connectionList.get(2)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 3: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(3),connectionList.get(3)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 4: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(4),connectionList.get(4)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 5: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(5),connectionList.get(5)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 6: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(6),connectionList.get(6)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 7: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(7),connectionList.get(7)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 8: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(8),connectionList.get(8)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 9: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(9),connectionList.get(9)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; case 10: if(list.size() >= this.insertCkBatchSize || this.isTimeToDoInsert()) { insertData(list,preparedStatementList.get(10),connectionList.get(10)); list.clear(); this.lastInsertTime = System.currentTimeMillis(); } else { list.add(row); } break; } } } @Override public void close() throws Exception { for (Statement statement : this.statementList) { if (null != statement) { statement.close(); } } for (PreparedStatement preparedStatement : this.preparedStatementList) { if (null != preparedStatement) { preparedStatement.close(); } } for (Connection connection : this.connectionList) { if (null != connection) { connection.close(); } } } /** * 根据时间判断是否插入数据 * * @return */ private boolean isTimeToDoInsert() { long currTime = System.currentTimeMillis(); return currTime - this.lastInsertTime >= this.insertCkTimenterval; } }
通过自定义Sink方式写入Clickhouse,底层还是使用JDBC的方式,要注意插入不要过于频繁,否则会报错误(数据插入的频率大于数据合并)批次插入,批次最好设置大点,轮询写入每个节点方式有待优化。
最后
以上就是单薄发箍最近收集整理的关于实时日志数据写入Clickhouse的全部内容,更多相关实时日志数据写入Clickhouse内容请搜索靠谱客的其他文章。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复