概述
需求
利用Hadoop MapReduce对文件进行处理
源文件:(局部)
{"date":" 2020.9.3","temp":" 17-28","city":"北京","weather":" 多云","wind":" 无持续风向 3-4级"}
处理后的结果:(局部)
北京, 2020.9.3, 多云, 17-28, 无持续风向 3-4级
提示:
-
把json格式转换为简单字符串格式:
依赖:
<dependency> <groupId>commons-beanutils</groupId> <artifactId>commons-beanutils</artifactId> <version>1.9.3</version> </dependency> <dependency> <groupId>commons-collections</groupId> <artifactId>commons-collections</artifactId> <version>3.2.1</version> </dependency> <dependency> <groupId>commons-lang</groupId> <artifactId>commons-lang</artifactId> <version>2.6</version> </dependency> <dependency> <groupId>commons-logging</groupId> <artifactId>commons-logging</artifactId> <version>1.1.1</version> </dependency> <dependency> <groupId>net.sf.ezmorph</groupId> <artifactId>ezmorph</artifactId> <version>1.0.6</version> </dependency> <dependency> <groupId>net.sf.json-lib</groupId> <artifactId>json-lib</artifactId> <version>2.4</version> <classifier>jdk15</classifier> </dependency>
-
注意包和方法
-
注意逻辑
代码
JavaBean代码
public class WeatherData {
/**
* {"date":" 2020.9.3","temp":" 17-28","city":"北京","weather":" 多云","wind":" 无持续风向 3-4级"}
* 北京, 2020.9.3, 多云, 17-28, 无持续风向 3-4级
*/
private String city;
private String date;
private String weather;
private String temp;
private String wind;
public WeatherData(){}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public String getWeather() {
return weather;
}
public void setWeather(String weather) {
this.weather = weather;
}
public String getTemp() {
return temp;
}
public void setTemp(String temp) {
this.temp = temp;
}
public String getWind() {
return wind;
}
public void setWind(String wind) {
this.wind = wind;
}
@Override
public String toString() {
return this.city+","+this.date+","+this.weather+","+this.temp+","+this.wind;
}
}
Mapper代码
import net.sf.json.JSONObject;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class WeatherDataMapNew extends Mapper<LongWritable, Text,Text, NullWritable> {
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
JSONObject obj =JSONObject.fromObject(line);
//将 Json 形式的字符串转换为 JavaBean
WeatherData wd = (WeatherData) JSONObject.toBean(obj,WeatherData.class);
k.set(wd.toString());
context.write(k,NullWritable.get());
}
}
Reducer代码
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class WeatherDataReduce2 extends Reducer<Text, NullWritable,Text,NullWritable> {
@Override
protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
context.write(key,NullWritable.get());
}
}
Driver代码
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
public class WeatherDataDriver2 {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("hadoop.temp.dir","D://file//hdfs_temp");
//创建job对象
Job job = Job.getInstance(conf);
//设置存储jar位置
job.setJarByClass(WeatherDataDriver2.class);
//关联Mapper和Reducer
//
job.setMapperClass(WeatherDataMap2.class);
job.setMapperClass(WeatherDataMapNew.class);
job.setReducerClass(WeatherDataReduce2.class);
//设置Mapper阶段输出的key和value
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
//设置最终阶段输出的key和value
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//设置输入输出路径
Path p = setPath(job);
p.getFileSystem(conf).delete(p,true);
//提交job
Boolean result=job.waitForCompletion(true);
System.out.println(result);
System.exit(result?0:1);
}
private static Path setPath(Job job) throws IOException {
Path input = new Path("D:\file\weatherdata2.txt");
Path output = new Path("D:\File\output");
FileInputFormat.addInputPath(job,input);
FileOutputFormat.setOutputPath(job,output);
return output;
}
}
最后
以上就是谨慎大神为你收集整理的json格式转换为简单字符串格式Mapreduce清洗实操需求代码的全部内容,希望文章能够帮你解决json格式转换为简单字符串格式Mapreduce清洗实操需求代码所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复