我是靠谱客的博主 沉静雪糕,最近开发中收集的这篇文章主要介绍hadoop学习笔记4 在学习笔记3基础上增加案例测试,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

java代码如下   求出学生的成绩平均值 

package mouapTest;


import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;


import mouapTest.WorldCount2_5_2.IntSumReducer;
import mouapTest.WorldCount2_5_2.TokenizerMapper;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


public class WorldCount_studentAvgSort {


/**

* @author root
*
*/
public static class StuMap extends Mapper<Object, Text, Text, IntWritable>{

@Override
protected void map(Object key, Text value,Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

String line = value.toString(); 
System.out.println("line = "+line); //将纯文本转为string

StringTokenizer token  = new StringTokenizer(line,"n"); //将输入的数据按行进行分割

while(token.hasMoreTokens()){ //循环一行记录

StringTokenizer tokenline = new StringTokenizer(token.nextToken());
String strName = tokenline.nextToken();
String strScore = tokenline.nextToken();
Text name = new Text(strName);
int scoreInt = Integer.parseInt(strScore); //分数

context.write(name, new IntWritable(scoreInt)); //key为姓名  value为分数
}
}

}

/**

* @author root
*
*/
public static class Reduce extends Reducer<Text,IntWritable , Text, IntWritable>{


@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

int sum = 0;
int count = 0;

Iterator<IntWritable> iterator = values.iterator();
while(iterator.hasNext()){
sum += iterator.next().get();
count++;
}
int avg = sum/count ;
context.write(key, new IntWritable(avg));
}

}


public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();
 
GenericOptionsParser optionParser = new GenericOptionsParser(conf, args);
 
String[] remainingArgs = optionParser.getRemainingArgs();
 
if (!(remainingArgs.length != 2 || remainingArgs.length != 4)) {
   System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]");
   System.exit(2);
}
 
Job job = Job.getInstance(conf, "word student avg");
 
job.setJarByClass(WorldCount_studentAvgSort.class);
 
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
 
job.setMapperClass(StuMap.class);
job.setReducerClass(Reduce.class);


/**/
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
 
job.setCombinerClass(Reduce.class);
 
 
// List<String> otherArgs = new ArrayList<String>();
// for (int i=0; i < remainingArgs.length; ++i) {
//   if ("-skip".equals(remainingArgs[i])) {
//     job.addCacheFile(new Path(remainingArgs[++i]).toUri());
//     job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
//   } else {
//     otherArgs.add(remainingArgs[i]);
//   }
// }
// FileInputFormat.addInputPath(job, new Path(otherArgs.get(0)));
// FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
 
 
//hdfs://127.0.0.1:9000/user/root/instudent hdfs://127.0.0.1:9000/user/root/outstudent
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
 
// System.out.println("otherArgs.get(0) ="+otherArgs.get(0)+"   otherArgs.get(1)"+otherArgs.get(1));
 
try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();

}
}


用eclipse打包成jar  /home/mouap/mouapTest/WorldCount_studentAvgSort.jar  这里要注意增加main方法


然后进入 /home/mouap/hadoop 路径下 建立 instudent文件夹

在 /home/mouap/hadoop/instudent 路径下 建立2个文件

student1.txt student2.txt

内容:

student1.txt >>>

zhangsan 10
zhangsan 20
zhangsan 30
zhangsan 40

student2.txt >>>

lisi 10
lisi 20
lisi 60
lisi 80


然后将文件放入 hdfs 目录下 bin/hadoop dfs -put input /user/root/instudent

在 /home/mouap/hadoop/ 目录下 执行 bin/hadoop jar /home/mouap/mouapTest/WorldCount_studentAvgSort.jar  /user/root/instudent /user/root/outstudent

这里要注意 执行eclipse打包的jar 和 执行案例worldCount命令 有点小不同 具体参考 前面学习笔记

输出结果:

lisi 42
zhangsan 25


案例2 :java代码

package mouapTest;


import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;


import mouapTest.WorldCount_studentAvgSort.Reduce;
import mouapTest.WorldCount_studentAvgSort.StuMap;


import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;


public class WorldCount_helthBody {






/**

* @author root
*
*/
public static class StuMap extends Mapper<Object, Text, Text, IntWritable>{

@Override
protected void map(Object key, Text value,Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

String line = value.toString(); 
System.out.println("line = "+line); //将纯文本转为string

StringTokenizer token  = new StringTokenizer(line,"#"); //将输入的数据按行进行分割

while(token.hasMoreTokens()){ 

StringTokenizer tokennameline = new StringTokenizer(token.nextToken());

while(tokennameline.hasMoreTokens()){

StringTokenizer tokenline = new StringTokenizer(tokennameline.nextToken());

String strName = tokenline.nextToken();

if(tokenline.hasMoreTokens()){
String strScore = tokenline.nextToken();
Text name = new Text(strName);
int scoreInt = Integer.parseInt(strScore); //分数
context.write(name, new IntWritable(scoreInt)); //key为姓名  value为分数
}

}
}
}

}

/**

* @author root
*
*/
public static class Reduce extends Reducer<Text,IntWritable , Text, IntWritable>{


@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

int sum = 0;
int count = 0;

Iterator<IntWritable> iterator = values.iterator();
while(iterator.hasNext()){
sum += iterator.next().get();
count++;
}
int avg = sum/count ;
context.write(key, new IntWritable(avg));
}

}


public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();
 
GenericOptionsParser optionParser = new GenericOptionsParser(conf, args);
 
String[] remainingArgs = optionParser.getRemainingArgs();
 
if (!(remainingArgs.length != 2 || remainingArgs.length != 4)) {
   System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]");
   System.exit(2);
}
 
Job job = Job.getInstance(conf, "word student avg");
 
job.setJarByClass(WorldCount_studentAvgSort.class);
 
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
 
job.setMapperClass(StuMap.class);
job.setReducerClass(Reduce.class);


/**/
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
 
job.setCombinerClass(Reduce.class);
 
 
// List<String> otherArgs = new ArrayList<String>();
// for (int i=0; i < remainingArgs.length; ++i) {
//   if ("-skip".equals(remainingArgs[i])) {
//     job.addCacheFile(new Path(remainingArgs[++i]).toUri());
//     job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
//   } else {
//     otherArgs.add(remainingArgs[i]);
//   }
// }
// FileInputFormat.addInputPath(job, new Path(otherArgs.get(0)));
// FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
 
 
//hdfs://127.0.0.1:9000/user/root/inheth hdfs://127.0.0.1:9000/user/root/outheth
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
 
// System.out.println("otherArgs.get(0) ="+otherArgs.get(0)+"   otherArgs.get(1)"+otherArgs.get(1));
 
try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();

}




}


zhangsan 10 #a  stu1.txt

zhangsan 20 #a  stu2.txt

zhangsan 30 #a  stu3.txt



===========================================常用命令 个人笔记========================================

cd /usr/local/bin
sudo -s /usr/app/eclipse/eclipse/eclipse  用终端启动eclipse


su root 


mpstat -P ALL 5    //需要注意的P和ALL一定要大写  查看CPU


cp -rf /home/mouap/hadoop/tmp /home/mouap/hadoop/tmpmoutest


chown -R 用户名:组名 文件名或目录名   --修改所有文件夹下面的内容 


mkdir xxx


file /bin/ls libhdfs.so  查看文件是否32位或者64


chown root * 修改第一个


chgrp root * 修改第二个


mv  wenjian  ziliao --改文件名字


chmod 777


find / -name network --查找文件examples.jar


tar zvxf eclipse-jee-luna-SR2-linux-gtk-x86_64.tar.gz --解压


update-alternatives --install /usr/bin/java java /usr/lib/jdk/jdk1.8.0_45/bin/java 300


update-alternatives --install /usr/bin/javac javac /usr/lib/jdk/jdk1.8.0_45/bin/javac 300


 tar zvxf eclipse-jee-luna-SR2-linux-gtk-x86_64.tar.gz


cd /etc  修改 目录 hosts 文件


/usr/lib/jdk/jdk1.8.0_45/bin/jps 查看进程
格式化文件   在 mouap/hadoop/目录下
bin/hdfs namenode -format
bin/hadoop dfs -mkdir /user/root --建立好目录 然后在执行下面语句 将文件夹放入文件
bin/hadoop dfs -put input /user/root/in   --放文件
bin/hadoop dfsadmin -safemode leave  如果报错 
bin/hadoop dfs -ls /in  查看 
bin/hadoop jar /usr/app/hadoop-eclipse-plugin/build/contrib/eclipse-plugin/lib/hadoop-mapreduce-examples-2.5.2.jar  wordcount in out  --运行案例程序
bin/hadoop jar /home/mouap/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar  wordcount in out  --运行案例程序
bin/hadoop jar /home/mouap/mouapTest/WorldCount_studentAvgSort.jar  /user/root/instudent /user/root/outstudent --运行eclipse自己打的包


bin/hadoop dfs -cat out/*   --运行输出结果
bin/hadoop dfs -cat /user/root/out/* --运行输出结果
bin/hadoop fs -rm -R  /user/root--删除out目录


javac -classpath ~/usr/app/hadoop-eclipse-plugin/build/contrib/eclipse-plugin/lib -d WorldCount2_5_2.java






启动服务
/home/mouap/hadoop/sbin     ./start-dfs.sh
[root@nameNode sbin]# ./start-yarn.sh  
[root@nameNode sbin]# ./start-dfs.sh 


停止服务
home/mouap/hadoop/sbin# ./stop-all.sh




下载hadoop-eclipse插件 https://github.com/winghc/hadoop2x-eclipse-plugin


下载ant http://ant.apache.org/bindownload.cgi
cd /usr/bin     ln -s -f /usr/app/ant/bin/ant




ANT_HOME=/usr/app/ant
PATH=$JAVA_HOME/bin:$ANT_HOME/bin:$PATH




ant jar -Dversion=2.5.2 -Declipse.home=/usr/app/eclipse/eclipse 
-Dhadoop.home=/home/hadoop/hadoop-2.5.2


$ANT_HOME/bin/ant jar -Dversion=2.5.2 -Declipse.home=/usr/app/eclipse/eclipse -Dhadoop.home=/home/mouap/hadoop


ant jar -Dversion=2.5.2 -Declipse.home=/usr/app/eclipse/eclipse -Dhadoop.home=/home/mouap/hadoop


Building jar: /usr/app/hadoop-eclipse-plugin/build/contrib/eclipse-plugin/hadoop-eclipse-plugin-2.5.2.jar


/home/mouap/mouapTest/WorldCount_studentAvgSort.jar





最后

以上就是沉静雪糕为你收集整理的hadoop学习笔记4 在学习笔记3基础上增加案例测试的全部内容,希望文章能够帮你解决hadoop学习笔记4 在学习笔记3基础上增加案例测试所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(65)

评论列表共有 0 条评论

立即
投稿
返回
顶部