概述
2019独角兽企业重金招聘Python工程师标准>>>
你丫的调了两天,终于调通了。
MapperOracle.java
package com.uucun.hadooporacle;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MapperOracle extends Mapper<LongWritable, MyDbWriteble, Text, NullWritable>{
final Logger logger = LoggerFactory.getLogger(MapperOracle.class);
@Override
protected void map(LongWritable key, MyDbWriteble value,
Context context)
throws IOException, InterruptedException {
context.write(new Text(value.toString()), NullWritable.get());
}
}
MyDbWriteble.java
package com.uucun.hadooporacle;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.lib.db.DBWritable;
public class MyDbWriteble implements DBWritable,WritableComparable {
private List list=new ArrayList();
class DownloadAd{
private int id;
private String session_id;
private int resource_id;
private Date visit_date;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getSession_id() {
return session_id;
}
public void setSession_id(String session_id) {
this.session_id = session_id;
}
public int getResource_id() {
return resource_id;
}
public void setResource_id(int resource_id) {
this.resource_id = resource_id;
}
public Date getVisit_date() {
return visit_date;
}
public void setVisit_date(Date visit_date) {
this.visit_date = visit_date;
}
}
@Override
public void write(PreparedStatement statement) throws SQLException {
//statement.execute();
}
@Override
public void readFields(ResultSet resultSet) throws SQLException {
DownloadAd downloadAd;
do {
downloadAd=new DownloadAd();
downloadAd.id=resultSet.getInt("id");
downloadAd.session_id=resultSet.getString("session_id");
downloadAd.resource_id=resultSet.getInt("resource_id");
downloadAd.visit_date=resultSet.getDate("visit_date");
list.add(downloadAd);
} while (resultSet.next());
}
@Override
public String toString() {
StringBuilder builder=new StringBuilder();
for (Iterator iterator = list.iterator(); iterator.hasNext();) {
DownloadAd downloadAd = (DownloadAd) iterator.next();
builder.append(downloadAd.session_id);
builder.append(",");
builder.append(downloadAd.resource_id);
builder.append("n");
}
builder.deleteCharAt(builder.lastIndexOf("n"));
return builder.toString();
}
@Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
}
@Override
public void readFields(DataInput in) throws IOException {
}
@Override
public int compareTo(Object o) {
// TODO Auto-generated method stub
return 0;
}
public List getList() {
return list;
}
public void setList(List list) {
this.list = list;
}
}
RunTool.java
package com.uucun.hadooporacle;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.db.DBConfiguration;
import org.apache.hadoop.mapreduce.lib.db.DataDrivenDBInputFormat;
import org.apache.hadoop.mapreduce.lib.db.OracleDataDrivenDBInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import com.uucun.util.hadooputil.PluginUtil;
class RunTool extends PluginUtil{
public static void main(String[] args) throws Throwable{
System.setProperty("HADOOP_USER_NAME","hadoop");
final JobConf conf = new JobConf();
System.out.println(args[0]);
// System.exit(0);
conf.set("fs.default.name", "hdfs://nn:9000");
// conf.set("hadoop.job.user", "hadoop");
conf.set("mapred.job.tracker", "nn:9001");
conf.set("dfs.permissions","false");
// conf.set("mapred.map.tasks", "1");
conf.setInt("mapred.map.tasks", 4);
// DBConfiguration.configureDB(conf, "oracle.jdbc.driver.OracleDriver", "url");
conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, "t.id");
// conf.set(DBConfiguration.INPUT_CONDITIONS_PROPERTY, "visit_date>TO_TIMESTAMP('2013-05-23','YYYY-MM-DD HH24:MI:SS.FF') and visit_date<=TO_TIMESTAMP('2013-05-25','YYYY-MM-DD HH24:MI:SS.FF')");
DBConfiguration.configureDB(conf, "oracle.jdbc.driver.OracleDriver", "jdbc:oracle:thin:@192.168.4.73:1521:XE", "oracle", "oracle");
String jarName = runonhadoop().toString();
conf.setJar(jarName);
Job job = new Job( conf , "hadoopOracle" );
job.setMapperClass( MapperOracle.class );
// job.setReducerClass(Reducer.class);
job.setInputFormatClass(OracleDataDrivenDBInputFormat.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(NullWritable.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setJarByClass(RunTool.class);
String queryStr="select t.id,t.session_id,t.resource_id,t.visit_date from tbl_res_download_succeed_log t where t.visit_date>to_date('2013-05-23','yyyy-MM-dd HH24:mi:ss') and t.visit_date<=to_date('2013-05-25','yyyy-MM-dd HH24:mi:ss') and $CONDITIONS";
// String queryStr="select id,session_id,resource_id,visit_date from tbl_res_download_succeed_log where visit_date>TO_TIMESTAMP('2013-05-23','YYYY-MM-DD HH24:MI:SS.FF') and visit_date<=TO_TIMESTAMP('2013-05-25','YYYY-MM-DD HH24:MI:SS.FF')";
// String queryStr="select id,session_id,resource_id,visit_date from tbl_res_download_succeed_log";
String boundStr="select min(id),max(id) from tbl_res_download_succeed_log where visit_date>to_date('2013-05-23','yyyy-MM-dd HH24:mi:ss') and visit_date<=to_date('2013-05-25','yyyy-MM-dd HH24:mi:ss')";
TextOutputFormat.setOutputPath(job, new Path(args[1]));
OracleDataDrivenDBInputFormat.setInput(job, MyDbWriteble.class,queryStr ,boundStr);
// OracleDataDrivenDBInputFormat.setInput(job, MyDbWriteble.class,"tbl_res_download_succeed_log","visit_date>to_date('2013-05-23','yyyy-MM-dd HH24:mi:ss') and visit_date<to_date('2013-05-24','yyyy-MM-dd HH24:mi:ss')","visit_date","id","session_id","resource_id","visit_date");
// OracleDataDrivenDBInputFormat.setBoundingQuery(job.getConfiguration(), boundStr);
FileSystem fs =FileSystem.get(conf);
if (fs.exists(new Path(args[1]))) {
fs.delete(new Path(args[1]),true);
fs.close();
}
job.waitForCompletion(true);
}
}
尤其要注意 $CONDITIONS,就是这个毛毛搞得我两天才调通,还有DBWritable接口中的readFields方法中,ResultSet类型的参数已经是记录的第一条,不能按常规的去next,否则会少一条记录。
转载于:https://my.oschina.net/caolinsheng/blog/279447
最后
以上就是拼搏红牛为你收集整理的hadoop从oracle中到数据的全部内容,希望文章能够帮你解决hadoop从oracle中到数据所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复