hadoop获取成绩列表中每科成绩的最高分，获取成绩列表中每科成绩的平均分

168 阅读 0 评论 111 点赞

我是靠谱客的博主光亮草莓，这篇文章主要介绍hadoop获取成绩列表中每科成绩的最高分，获取成绩列表中每科成绩的平均分，现在分享给大家，希望可以做个参考。

1、实验内容
编程练习：
（1）成绩表subject_score.txt中每行数据包括科目和分数两个字段，要求获取成绩列表中每科成绩的最高分；。
（2）成绩表subject_score.txt中每行数据包括科目和分数两个字段，要求获取成绩列表中每科成绩的平均分；。；
2、实验目的
掌握Hadoop系统的MapReduce程序设计方法；
3、代码及测试结果

package score;
import java.io.IOException;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class S {
public static void main(String[] args)throws Exception {
// TODO Auto-generated method stub
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
Job wordCountJob = Job.getInstance(conf,"word count");
//重要：指定本job所在的jar包
wordCountJob.setJarByClass(S.class);
//设置wordCountJob所用的mapper逻辑类为哪个类
wordCountJob.setMapperClass(SMapper.class);
//设置wordCountJob所用的reducer逻辑类为哪个类
wordCountJob.setReducerClass(SReducer.class);
//设置map阶段输出的kv数据类型
wordCountJob.setMapOutputKeyClass(Text.class);
wordCountJob.setMapOutputValueClass(IntWritable.class);
//设置最终输出的kv数据类型
wordCountJob.setOutputKeyClass(Text.class);
wordCountJob.setOutputValueClass(IntWritable.class);
//设置要处理的文本数据所存放的路径
FileInputFormat.setInputPaths(wordCountJob, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(wordCountJob, new Path(otherArgs[1]));
//提交job给hadoop集群
wordCountJob.waitForCompletion(true);
}
}
package score;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Mapper.Context;
public class SMapper extends Mapper<LongWritable, Text, Text, IntWritable>{
/*
* map方法是提供给map task进程来调用的，map task进程是每读取一行文本来调用一次我们自定义的map方法
* map task在调用map方法时，传递的参数：
*
一行的起始偏移量LongWritable作为key
*
一行的文本内容Text作为value
*/
Text course=new Text();
IntWritable score=new IntWritable();
@Override
protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException {
//拿到一行文本内容，转换成String 类型
String line = value.toString();
//将这行文本切分成单词
String[] words=line.trim().split(" ");
course.set(words[0]);
score.set(Integer.parseInt(words[1]));
//输出<单词，1>
context.write(course,score);
}
}
package score;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Reducer.Context;
public class SReducer extends Reducer<Text, IntWritable, Text, IntWritable>{
/*
* reduce方法提供给reduce task进程来调用
*
* reduce task会将shuffle阶段分发过来的大量kv数据对进行聚合，聚合的机制是相同key的kv对聚合为一组
* 然后reduce task对每一组聚合kv调用一次我们自定义的reduce方法
* 比如：<hello,1><hello,1><hello,1><tom,1><tom,1><tom,1>
*
hello组会调用一次reduce方法进行处理，tom组也会调用一次reduce方法进行处理
*
调用时传递的参数：
*
key：一组kv中的key
*
values：一组kv中所有value的迭代器
*/
private IntWritable result=new IntWritable();
protected void reduce(Text key, Iterable<IntWritable> values,Context context) throws IOException, InterruptedException {
//定义一个计数器
int max = 0;
int sum=0;
int count=0;
//通过value这个迭代器，遍历这一组kv中所有的value，进行累加
for(IntWritable score:values){
if(max<score.get()) {
max=score.get();
}
sum+=score.get();
count++;
}
int avg=(int)sum/count;
result.set(max);
//输出这个单词的统计结果
context.write(key,new IntWritable(avg) );
context.write(key, result);
}
}