概述
1.相关jar包
2.一些需要的参数定义在message.properties中
jdbcHiveUrl=jdbc:hive2://10.1.9.91:10000 //hive地址
jdbcHiveDriver=org.apache.hive.jdbc.HiveDriver // hive驱动
jdbc_mysql_driver=com.mysql.jdbc.Driver //MySQL驱动
hiveUser=hive //hive用户名
hivePwd=123456 //hive密码
hiveType=star
hiveDbName=default
mapred.reduce.tasks=1
sqoopServerUrl=http://10.1.9.91:12000/sqoop/ //sqoop服务器地址
##polling interval time init(ms)
polling_interval_time = 86400000 //定时的时间间隔,参照上一篇
#Project start day hour:min:second
polling_start_time = 10:54:10 //定时的启动时间
outputFormat=TEXT_FILE
storageType=HDFS
sqoopOutput = /user/outputHive/
HDFSUrl =
export_target_database_url=jdbc:mysql://10.1.35.13:3306/tsinghuadaxue //MySQL数据库地址
export_target_database_username=root //MySQL用户名
export_target_database_password=root //MySQL用户密码
3.创建DataSynMysqlAndHiveService,HiveService,JDBCService三个service并实现,
package com.scheduler.service.impl;
import java.io.IOException;
import java.sql.*;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;
import java.util.Date;
import com.scheduler.service.JDBCService;
import com.scheduler.util.*;
import org.apache.sqoop.submission.SubmissionStatus;
import com.scheduler.service.HiveService;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.apache.sqoop.client.SqoopClient;
import org.apache.sqoop.client.SubmissionCallback;
import org.apache.sqoop.model.*;
import org.apache.sqoop.validation.Status;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import com.mysql.jdbc.Statement;
import com.scheduler.service.DataSynMysqlAndHive;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.sqoop.model.MConnection;
import org.apache.sqoop.model.MConnectionForms;
import org.apache.sqoop.model.MJob;
import org.apache.sqoop.model.MJobForms;
import org.apache.sqoop.model.MSubmission;
import org.apache.sqoop.submission.counter.Counter;
import org.apache.sqoop.submission.counter.CounterGroup;
import org.apache.sqoop.submission.counter.Counters;
@Service("DataSynMysqlAndHiveImpl")
public class DataSynMysqlAndHiveImpl implements DataSynMysqlAndHive {
protected Logger log = Logger.getLogger(DataSynMysqlAndHiveImpl.class);
private static String jdbcHiveDriver = Messages.getString("jdbcHiveDriver");
private static String jdbcHiveUrl = Messages.getString("jdbcHiveUrl");
private static String hiveUser = Messages.getString("hiveUser");
private static String hivePwd = Messages.getString("hivePwd");
private static String exportDatabase = Messages.getString("export_target_database_url");
private static String exportUsername = Messages.getString("export_target_database_username");
private static String exportPassword = Messages.getString("export_target_database_password");
private static String jdbcMysqlDriver = Messages.getString("jdbc_mysql_driver");
private static String pollingStartTime = Messages.getString("polling_start_time");
private static SimpleDateFormat yMd = new SimpleDateFormat("yyyy-MM-dd");
private static SimpleDateFormat yMdHms = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
private static Date polling_start_time = null; //轮询开始时间
private static String sqoopServerUrl = Messages.getString("sqoopServerUrl");
private SqoopClient sqoopClient;// sqoop客户端对象
@Autowired
private JDBCService jdbcService; // 增加JDBC服务
@Autowired
private HiveService hfs;
@Override
public String exportHiveData(String tableName) {
String flag = "success";
try {
Class.forName(jdbcHiveDriver);
} catch (ClassNotFoundException e) {
flag = "error";
e.printStackTrace();
log.error("hive链接出错", e);
}
//获取当天时间以及前一天的时间
Date nowDate = new Date();
Calendar calendar = Calendar.getInstance();
calendar.setTime(nowDate);
calendar.add(Calendar.DAY_OF_MONTH, -1);
Date predate = calendar.getTime();
SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd");
String predateString = dateFormat.format(predate) + " " + pollingStartTime;
String nowdateString = dateFormat.format(nowDate) + " " + pollingStartTime;
String sql = "select * from " + tableName + " where resource_flag = 1 and create_time <= '" + nowdateString +"' and create_time >'" + predateString +"'";
log.info("sql:" + sql);
System.out.println("sql:" + sql);
try {
Connection con = DriverManager.getConnection(jdbcHiveUrl, hiveUser,
hivePwd);
java.sql.Statement stmt = con.createStatement();
ResultSet resultSet = stmt.executeQuery(sql);
if (resultSet.next()) {//如果查询hive有数据则进行更新,如果没有数据那么不更新
String exportSql = generateExportSql(sql,tableName);
ResultSet set = stmt.executeQuery(exportSql);
System.out.println("导出sql为:"+exportSql);
if (set.next()) {
int result = set.getInt(1);
if (result == 1) {
flag = "error";
}
}
}
closeConnection(con, stmt, resultSet);
} catch (SQLException e) {
e.printStackTrace();
flag = "error";
}
return flag;
}
/**
* @param sql
* @param tableName
* @return
*/
private String generateExportSql(String selectSql, String tableName) {
//拼接sql,使用udf函数导出
StringBuffer buffer = new StringBuffer();
buffer.append("select dboutput('");
buffer.append(exportDatabase);
buffer.append("','");
buffer.append(exportUsername);
buffer.append("','");
buffer.append(exportPassword);
buffer.append("','");
//定义数据库链接
Connection conn = null;
//定义数据库查询结果劫
ResultSet rs = null;
try {
//设置编码
/*if (exportDatabase.contains("jdbc:mysql") && !exportDatabase.contains("characterEncoding")) {
exportDatabase = exportDatabase + "?characterEncoding=UTF-8";//设置utf-8编码
}*/
//获取数据库链接
conn=getConnection(jdbcMysqlDriver, exportDatabase, exportUsername, exportPassword);
//获取结果
rs=conn.getMetaData().getColumns(null, null, tableName, null);
//循环获取所有结果
String columnNames = "";
String value = "";
while(rs.next()){
if (!StringUtils.equals("id", rs.getString("COLUMN_NAME"))) {
columnNames = columnNames + rs.getString("COLUMN_NAME") + ",";
value = value + "?,";
}
}
columnNames = columnNames.substring(0, columnNames.length()-1);
value = value.substring(0, value.length()-1);
String insertSql = "insert into " + tableName + "(" + columnNames +") values(" +value + ")";
buffer.append(insertSql+"',");
buffer.append(columnNames);
buffer.append(") from ");
buffer.append("("+selectSql.replace("*", columnNames)+")");
} catch (Exception e) {
e.printStackTrace();
}
closeConnection(conn, null, rs);
System.out.println("导出的sql为:"+buffer.toString());
return buffer.toString();
}
public void closeConnection(Connection connection, java.sql.Statement pStatement, ResultSet resultSet){
try {
if (resultSet != null) {
resultSet.close();
}
if (pStatement != null) {
pStatement.close();
}
if (connection != null) {
connection.close();
}
} catch (Exception e) {
e.printStackTrace();
}
}
public Connection getConnection(String driver, String url, String userName,
String password) {
//定义链接
Connection connection = null;
//加载数据库驱动
try {
Class.forName(driver);
} catch (ClassNotFoundException e) {
e.printStackTrace();
System.out.println("The Driver loaded error,please contact to your Software Designer!");
}
//得到数据库链接
try {
Properties props =new Properties();
props.put("remarksReporting","true");
props.put("user", userName);
props.put("password", password);
connection = DriverManager.getConnection(url, props);
//connection = DriverManager.getConnection(url, userName, password);
} catch (SQLException e) {
e.printStackTrace();
}
return connection;
}
/**
* <p>
* Description:[mysql向hive中导入]
*
*/
@Override
public String importHiveData(String sourceTableName) {
//判断有没有数据更新
try {
Date nowTime = yMdHms.parse(yMdHms.format(new Date()));
//前一天时间
String preDate = yMdHms.format(TimeHelper.dateAddDay(nowTime,-1));
//
Timestamp aftTimestamp = getAfterMaxTimestamp(sourceTableName,preDate,"create_time");
if (null == aftTimestamp ){
return "检测没有新数据";
}
} catch (ParseException e) {
e.printStackTrace();
}
//定义全局变量监控抽取过程是否出现错误
boolean hasError = false;
//1.初始化sqoop客户端并且得到sqoop连接
MConnection con =initSqoop();
//如果得到的连接为空,打印日志,结束该任务
if (con == null) {
System.out.print("连接为空");
return "error";
}
//2.创建sqoop任务,任务类型为导入任务
MJob newjob = sqoopClient.newJob(con.getPersistenceId(),org.apache.sqoop.model.MJob.Type.IMPORT);
CallBack callback = new CallBack(sourceTableName);
//获取该表的表信息
List<TableVO> tableVOs = jdbcService.getTables(exportDatabase, exportUsername, exportPassword, null, null, sourceTableName, null);
//获取该表的列信息
List<ColumnVO> columnVOs = jdbcService.getColumns(exportDatabase, exportUsername, exportPassword, sourceTableName);
boolean isFirst = true;
String primaryKey = jdbcService.getPrimaryKey(exportDatabase,exportUsername,exportPassword,null,null,sourceTableName);
String hdfsFilePath= "";
hdfsFilePath=updateIncrementSqoopJob(newjob,sourceTableName,columnVOs);
//启用线程监控sqoop采集时长
Thread thread = monitorRuntime(sqoopClient,3*60*60,newjob);
//定义任务开始时间变量
long startTime = System.currentTimeMillis();
//开始sqoop任务采集,并返回sqoop任务采集状态
MSubmission submission = startSqoopTask(0,newjob,thread,callback);
//将sqoop导入时间字段添加到column中
columnVOs=addSqoopTimeColumn(columnVOs);
if (submission.getStatus().compareTo(SubmissionStatus.SUCCEEDED) == 0) {// 任务执行成功,则把数据写入到hive中
hasError=createOrcHiveAfterSqoop(sourceTableName,columnVOs, hdfsFilePath, startTime, startTime, false);
}
if (submission.getStatus().compareTo(SubmissionStatus.FAILED) == 0|| submission.getExceptionInfo() != null) {// 任务执行出错,打印出错信息,并记录到任务日志中
System.out.println(submission.getExceptionInfo());
//出现错误,记录日志,删除hdfs文件
addLogCaseSqoopFail(submission,newjob,hdfsFilePath,thread);
//标记发生错误
hasError = true;
return "error";
}
//afterFinishTask(hasError);
return "success";
}
/**
* <p>
* Description:[初始化sqoop客户端,得到sqoop链接]
* </p>
* @return MConnection sqoop连接
*/
public MConnection initSqoop(){
//初始化客户端
this.sqoopClient = new SqoopClient(sqoopServerUrl);
//获取该数据源的sqoop链接id
Long conId = createSqoopConnection("zheda",exportDatabase,exportUsername,exportPassword,jdbcMysqlDriver);
//根据sqoop xid 获得链接
MConnection con =sqoopClient.getConnection(conId);
//将该链接返回
return con;
}
public long createSqoopConnection(String resourceName, String jdbcUrl,
String name, String passwd, String driver) {
SqoopClient sqoopClient = new SqoopClient(Messages.getString("sqoopServerUrl"));
MConnection newCon = sqoopClient.newConnection(1);
MConnectionForms conForms = newCon.getConnectorPart();
MConnectionForms frameworkForms = newCon.getFrameworkPart();
newCon.setName(resourceName);
conForms.getStringInput("connection.connectionString").setValue(jdbcUrl);// 数据库连接url字符串
conForms.getStringInput("connection.jdbcDriver").setValue(driver);// 数据库驱动
conForms.getStringInput("connection.username").setValue(name);// 数据库用户名
conForms.getStringInput("connection.password").setValue(passwd);// 数据库密码
frameworkForms.getIntegerInput("security.maxConnections").setValue(0);// sqoop的最大连接数
try {
Status status = sqoopClient.createConnection(newCon);
if (status.canProceed()) {
return newCon.getPersistenceId();
} else {
log.info("Check for status and forms error ");
System.out.println("Check for status and forms error ");
return -1;
}
} catch (Exception e) {
log.error("创建连接出错!:"+e.getMessage());
System.out.println(e.getMessage());
return -1;
}
}
/**
* <p>
* Description:[初始化sqoop客户端,得到sqoop链接]
* </p>
*
*/
// sqoop任务执行回调内部类
class CallBack implements SubmissionCallback {
private String tableName;
public String getTableName() {
return tableName;
}
public void setTableName(String tableName) {
this.tableName = tableName;
}
public CallBack() {
super();
}
public CallBack(String tableName){
super();
this.tableName= tableName;
}
@Override
public void submitted(MSubmission mSubmission) {
}
@Override
public void updated(MSubmission mSubmission) {
}
// sqoop任务完成回调函数
@Override
public void finished(MSubmission arg0) {
}
}
/**
* <p>
* Description:[启用线程监控sqoop任务执行时长,如果超过执行时长,停止执行该任务]
* </p>
*
* @param SqoopClient sqoop客户端
* @param int 任务执行时长
* @param final long sqoop任务Id
* @return Thread 当前的监控线程
*/
public Thread monitorRuntime(SqoopClient sqc,int taskTime,final MJob sJob){
//获取监听时间,如果没有指定监听时间,默认为24小时
final int job_timeout_time = taskTime != 0 ? taskTime :20;
// 启用一个线程,用于监听sqoop执行任务的时间,如果时间超过最大执行时间,则停止掉该任务
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
try {
//监听任务执行时长,如果超过最大时间,停掉sqoop任务
Thread.sleep(job_timeout_time * 60 * 60 * 1000);
sqoopClient.stopSubmission(sJob.getPersistenceId());
} catch (InterruptedException e) {
log.error("sqoop全量任务发生异常!",e);
}
}
});
thread.start();
//将该线程返回
return thread;
}
/**
* <p>
* Description:[任务采集后,根据原表中的字段信息以及hdfs文件地址创建hive表]
* </p>
*
* @param tableName 表名称
* @param columnVOs 表字段
* @param hdfsPath hdfs文件地址
* @return boolean 是否创建成功
*/
public boolean createHiveTable(String tableName,List<ColumnVO> columnVOs,String hdfsPath){
boolean hasError = false;
//组装sql
StringBuffer createSql = new StringBuffer("create table " + tableName + "(");
for (int i = 0; i < columnVOs.size(); i++) {
if (i == 0) {
createSql.append("`" + columnVOs.get(i).getColumnName()+ "` string");
} else {
createSql.append(",`"+ columnVOs.get(i).getColumnName()+ "` string");
}
}
createSql.append(") ROW FORMAT DELIMITED FIELDS TERMINATED BY '