Hadoop生态圈-HBase的HFile创建方式

74 阅读 0 评论 49 点赞

我是靠谱客的博主朴素蚂蚁，最近开发中收集的这篇文章主要介绍Hadoop生态圈-HBase的HFile创建方式，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　　作者：尹正杰

　　废话不多说，直接上代码，想说的话都在代码的注释里面。

一.环境准备

list
create 'yinzhengjie:WordCount3','f1','f2'
list
desc 'yinzhengjie:WordCount3'
scan 'yinzhengjie:WordCount3'

二.编写HFile创建方式的代码

1>.编写Map端代码

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.hbase.hfile;
 7
 8 import org.apache.hadoop.io.IntWritable;
 9 import org.apache.hadoop.io.LongWritable;
10 import org.apache.hadoop.io.Text;
11 import org.apache.hadoop.mapreduce.Mapper;
12
13 import java.io.IOException;
14
15 public class HFileOutputMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
16 
@Override
17
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
18
//得到一行数据
19
String line = value.toString();
20
String[] arr = line.split(" ");
21
//
22
for (String word : arr){
23
context.write(new Text(word),new IntWritable(1));
24 
}
25 
}
26 }

2>.编写Reducer端代码

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.hbase.hfile;
 7
 8 import org.apache.hadoop.hbase.Cell;
 9 import org.apache.hadoop.hbase.CellUtil;
10 import org.apache.hadoop.hbase.KeyValue;
11 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
12 import org.apache.hadoop.hbase.util.Bytes;
13 import org.apache.hadoop.io.IntWritable;
14 import org.apache.hadoop.io.Text;
15 import org.apache.hadoop.mapreduce.Reducer;
16
17 import java.io.IOException;
18
19 public class HFileOutputReducer extends Reducer<Text,IntWritable,ImmutableBytesWritable,Cell> {
20 
@Override
21
protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
22
int sum = 0;
23
for (IntWritable value : values) {
24
sum += value.get();
25 
}
26
if(key.toString().length() > 0){
27
ImmutableBytesWritable outKey = new ImmutableBytesWritable(Bytes.toBytes(key.toString()));
28
//创建cell
29
Cell cell = CellUtil.createCell(Bytes.toBytes(key.toString()),
30
Bytes.toBytes("f1"), Bytes.toBytes("count"),System.currentTimeMillis(),
31
KeyValue.Type.Minimum,Bytes.toBytes(sum+""),null);
32 
context.write(outKey,cell);
33 
}
34 
}
35 }

3>.编写主程序代码

 1 /*
 2 @author :yinzhengjie
 3 Blog:http://www.cnblogs.com/yinzhengjie/tag/Hadoop%E7%94%9F%E6%80%81%E5%9C%88/
 4 EMAIL:y1053419035@qq.com
 5 */
 6 package cn.org.yinzhengjie.hbase.hfile;
 7
 8 import org.apache.hadoop.conf.Configuration;
 9 import org.apache.hadoop.fs.Path;
10 import org.apache.hadoop.hbase.Cell;
11 import org.apache.hadoop.hbase.HBaseConfiguration;
12 import org.apache.hadoop.hbase.HTableDescriptor;
13 import org.apache.hadoop.hbase.TableName;
14 import org.apache.hadoop.hbase.client.Connection;
15 import org.apache.hadoop.hbase.client.ConnectionFactory;
16 import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
17 import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2;
18 import org.apache.hadoop.io.IntWritable;
19 import org.apache.hadoop.io.Text;
20 import org.apache.hadoop.mapreduce.Job;
21 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
22 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
23
24 public class App {
25
26
public static void main(String[] args) throws Exception {
27
28
System.setProperty("HADOOP_USER_NAME", "yinzhengjie");
29
Configuration conf = HBaseConfiguration.create();
30
conf.set("fs.defaultFS","file:///");
31
Connection conn = ConnectionFactory.createConnection(conf);
32
Job job = Job.getInstance(conf);
33
job.setJobName("HFile WordCount");
34
job.setJarByClass(App.class);
35
job.setMapperClass(HFileOutputMapper.class);
36
job.setReducerClass(HFileOutputReducer.class);
37
//设置输出格式
38
job.setOutputFormatClass(HFileOutputFormat2.class);
39
//设置路径
40
FileInputFormat.addInputPath(job,new Path("file:///D:\BigData\yinzhengjieData\word.txt"));
41
FileOutputFormat.setOutputPath(job,new Path("file:///D:\BigData\yinzhengjieData\hfile"));
42
//设置输出k-v
43
job.setOutputKeyClass(ImmutableBytesWritable.class);
44
job.setOutputValueClass(Cell.class);
45
//设置map端输出k-v
46
job.setMapOutputKeyClass(Text.class);
47
job.setMapOutputValueClass(IntWritable.class);
48
/**
49 
*
配置和"yinzhengjie:WordCount3"进行关联，也就是说"yinzhengjie:WordCount3"这个表必须在HBase数据库中存在，
50 
* 实际操作是以"yinzhengjie:WordCount3"为模板，便于生成HFile文件！
51
*/
52
HFileOutputFormat2.configureIncrementalLoad(job, new HTableDescriptor(TableName.valueOf("yinzhengjie:WordCount3")),
53
conn.getRegionLocator(TableName.valueOf("yinzhengjie:WordCount3")) );
54
job.waitForCompletion(true);
55 
}
56 }