hadoop学习笔记4 在学习笔记3基础上增加案例测试

78 阅读 0 评论 52 点赞

我是靠谱客的博主沉静雪糕，最近开发中收集的这篇文章主要介绍hadoop学习笔记4 在学习笔记3基础上增加案例测试，觉得挺不错的，现在分享给大家，希望可以做个参考。

概述

java代码如下求出学生的成绩平均值

package mouapTest;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.StringTokenizer;

import mouapTest.WorldCount2_5_2.IntSumReducer;
import mouapTest.WorldCount2_5_2.TokenizerMapper;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WorldCount_studentAvgSort {

/**
*
* @author root
*
*/
public static class StuMap extends Mapper<Object, Text, Text, IntWritable>{

@Override
protected void map(Object key, Text value,Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

String line = value.toString();
System.out.println("line = "+line); //将纯文本转为string

StringTokenizer token = new StringTokenizer(line,"n"); //将输入的数据按行进行分割

while(token.hasMoreTokens()){ //循环一行记录

StringTokenizer tokenline = new StringTokenizer(token.nextToken());
String strName = tokenline.nextToken();
String strScore = tokenline.nextToken();
Text name = new Text(strName);
int scoreInt = Integer.parseInt(strScore); //分数

context.write(name, new IntWritable(scoreInt)); //key为姓名 value为分数
}
}

}

/**
*
* @author root
*
*/
public static class Reduce extends Reducer<Text,IntWritable , Text, IntWritable>{

@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

int sum = 0;
int count = 0;

Iterator<IntWritable> iterator = values.iterator();
while(iterator.hasNext()){
sum += iterator.next().get();
count++;
}
int avg = sum/count ;
context.write(key, new IntWritable(avg));
}

}

public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();

GenericOptionsParser optionParser = new GenericOptionsParser(conf, args);

String[] remainingArgs = optionParser.getRemainingArgs();

if (!(remainingArgs.length != 2 || remainingArgs.length != 4)) {
System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]");
System.exit(2);
}

Job job = Job.getInstance(conf, "word student avg");

job.setJarByClass(WorldCount_studentAvgSort.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

job.setMapperClass(StuMap.class);
job.setReducerClass(Reduce.class);

/**/
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

job.setCombinerClass(Reduce.class);

// List<String> otherArgs = new ArrayList<String>();
// for (int i=0; i < remainingArgs.length; ++i) {
// if ("-skip".equals(remainingArgs[i])) {
// job.addCacheFile(new Path(remainingArgs[++i]).toUri());
// job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
// } else {
// otherArgs.add(remainingArgs[i]);
// }
// }
// FileInputFormat.addInputPath(job, new Path(otherArgs.get(0)));
// FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

//hdfs://127.0.0.1:9000/user/root/instudent hdfs://127.0.0.1:9000/user/root/outstudent
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

// System.out.println("otherArgs.get(0) ="+otherArgs.get(0)+" otherArgs.get(1)"+otherArgs.get(1));

try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}

用eclipse打包成jar /home/mouap/mouapTest/WorldCount_studentAvgSort.jar 这里要注意增加main方法

然后进入 /home/mouap/hadoop 路径下建立 instudent文件夹

在 /home/mouap/hadoop/instudent 路径下建立2个文件

student1.txt student2.txt

内容:

student1.txt >>>

zhangsan 10
zhangsan 20
zhangsan 30
zhangsan 40

student2.txt >>>

lisi 10
lisi 20
lisi 60
lisi 80

然后将文件放入 hdfs 目录下 bin/hadoop dfs -put input /user/root/instudent

在 /home/mouap/hadoop/ 目录下执行 bin/hadoop jar /home/mouap/mouapTest/WorldCount_studentAvgSort.jar /user/root/instudent /user/root/outstudent

这里要注意执行eclipse打包的jar 和执行案例worldCount命令有点小不同具体参考前面学习笔记

输出结果：

lisi 42
zhangsan 25

案例2 ：java代码

package mouapTest;

import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;

import mouapTest.WorldCount_studentAvgSort.Reduce;
import mouapTest.WorldCount_studentAvgSort.StuMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class WorldCount_helthBody {

/**
*
* @author root
*
*/
public static class StuMap extends Mapper<Object, Text, Text, IntWritable>{

@Override
protected void map(Object key, Text value,Mapper<Object, Text, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

String line = value.toString();
System.out.println("line = "+line); //将纯文本转为string

StringTokenizer token = new StringTokenizer(line,"#"); //将输入的数据按行进行分割

while(token.hasMoreTokens()){

StringTokenizer tokennameline = new StringTokenizer(token.nextToken());

while(tokennameline.hasMoreTokens()){

StringTokenizer tokenline = new StringTokenizer(tokennameline.nextToken());

String strName = tokenline.nextToken();

if(tokenline.hasMoreTokens()){
String strScore = tokenline.nextToken();
Text name = new Text(strName);
int scoreInt = Integer.parseInt(strScore); //分数
context.write(name, new IntWritable(scoreInt)); //key为姓名 value为分数
}

}
}
}

}

/**
*
* @author root
*
*/
public static class Reduce extends Reducer<Text,IntWritable , Text, IntWritable>{

@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {

int sum = 0;
int count = 0;

Iterator<IntWritable> iterator = values.iterator();
while(iterator.hasNext()){
sum += iterator.next().get();
count++;
}
int avg = sum/count ;
context.write(key, new IntWritable(avg));
}

}

public static void main(String[] args) throws IOException {

Configuration conf = new Configuration();

GenericOptionsParser optionParser = new GenericOptionsParser(conf, args);

String[] remainingArgs = optionParser.getRemainingArgs();

if (!(remainingArgs.length != 2 || remainingArgs.length != 4)) {
System.err.println("Usage: wordcount <in> <out> [-skip skipPatternFile]");
System.exit(2);
}

Job job = Job.getInstance(conf, "word student avg");

job.setJarByClass(WorldCount_studentAvgSort.class);

job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);

job.setMapperClass(StuMap.class);
job.setReducerClass(Reduce.class);

/**/
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);

job.setCombinerClass(Reduce.class);

// List<String> otherArgs = new ArrayList<String>();
// for (int i=0; i < remainingArgs.length; ++i) {
// if ("-skip".equals(remainingArgs[i])) {
// job.addCacheFile(new Path(remainingArgs[++i]).toUri());
// job.getConfiguration().setBoolean("wordcount.skip.patterns", true);
// } else {
// otherArgs.add(remainingArgs[i]);
// }
// }
// FileInputFormat.addInputPath(job, new Path(otherArgs.get(0)));
// FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

//hdfs://127.0.0.1:9000/user/root/inheth hdfs://127.0.0.1:9000/user/root/outheth
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));

// System.out.println("otherArgs.get(0) ="+otherArgs.get(0)+" otherArgs.get(1)"+otherArgs.get(1));

try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
}

}

zhangsan 10 #a stu1.txt

zhangsan 20 #a stu2.txt

zhangsan 30 #a stu3.txt

===========================================常用命令个人笔记========================================

cd /usr/local/bin
sudo -s /usr/app/eclipse/eclipse/eclipse 用终端启动eclipse

su root

mpstat -P ALL 5 //需要注意的P和ALL一定要大写查看CPU

cp -rf /home/mouap/hadoop/tmp /home/mouap/hadoop/tmpmoutest

chown -R 用户名:组名文件名或目录名 --修改所有文件夹下面的内容　

mkdir xxx

file /bin/ls libhdfs.so 查看文件是否32位或者64

chown root * 修改第一个

chgrp root * 修改第二个

mv wenjian ziliao --改文件名字

chmod 777

find / -name network --查找文件examples.jar

tar zvxf eclipse-jee-luna-SR2-linux-gtk-x86_64.tar.gz --解压

update-alternatives --install /usr/bin/java java /usr/lib/jdk/jdk1.8.0_45/bin/java 300

update-alternatives --install /usr/bin/javac javac /usr/lib/jdk/jdk1.8.0_45/bin/javac 300

tar zvxf eclipse-jee-luna-SR2-linux-gtk-x86_64.tar.gz

cd /etc 修改目录 hosts 文件

/usr/lib/jdk/jdk1.8.0_45/bin/jps 查看进程
格式化文件在 mouap/hadoop/目录下
bin/hdfs namenode -format
bin/hadoop dfs -mkdir /user/root　--建立好目录然后在执行下面语句　将文件夹放入文件
bin/hadoop dfs -put input /user/root/in --放文件
bin/hadoop dfsadmin -safemode leave 如果报错
bin/hadoop dfs -ls /in 查看
bin/hadoop jar /usr/app/hadoop-eclipse-plugin/build/contrib/eclipse-plugin/lib/hadoop-mapreduce-examples-2.5.2.jar wordcount in out --运行案例程序
bin/hadoop jar /home/mouap/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.5.2.jar wordcount in out --运行案例程序
bin/hadoop jar /home/mouap/mouapTest/WorldCount_studentAvgSort.jar /user/root/instudent /user/root/outstudent --运行eclipse自己打的包

bin/hadoop dfs -cat out/* 　　--运行输出结果
bin/hadoop dfs -cat /user/root/out/*　--运行输出结果
bin/hadoop fs -rm -R /user/root--删除out目录

javac -classpath ~/usr/app/hadoop-eclipse-plugin/build/contrib/eclipse-plugin/lib -d WorldCount2_5_2.java

启动服务
/home/mouap/hadoop/sbin ./start-dfs.sh
[root@nameNode sbin]# ./start-yarn.sh
[root@nameNode sbin]# ./start-dfs.sh

停止服务
home/mouap/hadoop/sbin# ./stop-all.sh

下载hadoop-eclipse插件 https://github.com/winghc/hadoop2x-eclipse-plugin

下载ant http://ant.apache.org/bindownload.cgi
cd /usr/bin ln -s -f /usr/app/ant/bin/ant

ANT_HOME=/usr/app/ant
PATH=$JAVA_HOME/bin:$ANT_HOME/bin:$PATH

ant jar -Dversion=2.5.2 -Declipse.home=/usr/app/eclipse/eclipse
-Dhadoop.home=/home/hadoop/hadoop-2.5.2

$ANT_HOME/bin/ant jar -Dversion=2.5.2 -Declipse.home=/usr/app/eclipse/eclipse -Dhadoop.home=/home/mouap/hadoop

ant jar -Dversion=2.5.2 -Declipse.home=/usr/app/eclipse/eclipse -Dhadoop.home=/home/mouap/hadoop

Building jar: /usr/app/hadoop-eclipse-plugin/build/contrib/eclipse-plugin/hadoop-eclipse-plugin-2.5.2.jar

/home/mouap/mouapTest/WorldCount_studentAvgSort.jar