概述
最近在做大数据相关项目,大数据平台编程一般用JAVA,但由于项目组中成员用C#较熟,因此,准备做两者连接HIVE读取数据的性能比较测试,如果相差较大,就考虑整个项目用JAVA开发,这样学习成本较高。比较后发现,两者性能差距不大,这样至少前端的开发可以利用C#的原有资源了。好了,废话不多说,上代码。
- C#通过ODBC连接HIVE读取数据
C#连接HIVE使用MapRHive ODBC Connector,该软件可以下载地址 http://archive.mapr.com/tools/MapR-ODBC/MapR_Hive/。
1.安装完成后,打开 "ODBCData Source Administrator",新建 ODBC 连接,选择 "MapR Hive ODBC Connector"
2.DNS 重要参数配置如下图:
数据源名:自己定义一个
(1)host: hiveServer2 服务器的IP地址
(2)Port: hive Server 监听的服务端口,默认为10000,可以在 hive-site.xml 配置文件中查到
(3)hive server type:hiveserver2
(4)Authentication:如上图选择按 "user name" 访问的方式,并输入用户名
(5)点击 ”Test", 如下图连接成功,则OK
配置完成,测试成功后。编写控制台测试程序
private void btnConnectHive_Click(object sender, EventArgs e)
{
string dns = "DSN=MYHIVE;UID=hive;PWD=";
System.Diagnostics.Stopwatch stopwatch = new System.Diagnostics.Stopwatch();
stopwatch.Start(); // 开始监视代码
string sql = "select count(*) from wgmes.tq_zbs_if";
DataSet dsHivedata = HiveOdbcClient.Current.Query(dns, sql);
stopwatch.Stop(); // 停止监视
TimeSpan timeSpan = stopwatch.Elapsed; // 获取总时间
double hours = timeSpan.TotalHours; // 小时
double minutes = timeSpan.TotalMinutes; // 分钟
double seconds = timeSpan.TotalSeconds; // 秒数
double milliseconds = timeSpan.TotalMilliseconds; // 毫秒数
this.dgvHiveData.DataSource = dsHivedata.Tables[0];
this.txtBTime.Text = hours.ToString()+"h"+"rn"+minutes.ToString()+"m" + "rn" + seconds.ToString()+"s" + "rn" + milliseconds.ToString()+"ms";
}
其中,wgmes是数据库名,表tq_zbs_if是一个包含图片的11万条数据的表。
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Data.Odbc;
using System.Data;
namespace hiveform
{
public class HiveOdbcClient
{
public static HiveOdbcClient Current
{
get { return new HiveOdbcClient(); }
}
/// <summary>
///
/// </summary>
/// <param name="context"></param>
public void ExcuteNoQuery(string dns, string sql)
{
OdbcConnection conn = null;
try
{
conn = new OdbcConnection(dns);
conn.Open();
OdbcCommand cmd = new OdbcCommand(sql, conn);
cmd.ExecuteNonQuery();
}
catch (Exception ex)
{
throw ex;
}
finally
{
if (null != conn)
{
conn.Close();
}
}
}
/// <summary>
///
/// </summary>
/// <param name="context"></param>
/// <returns></returns>
public DataSet Query(string dns, string sql, string tblName = "tbl")
{
DataSet set = new DataSet();
OdbcConnection conn = null;
try
{
conn = new OdbcConnection(dns);
conn.Open();
OdbcCommand cmd = conn.CreateCommand();
cmd.CommandText = sql;
OdbcDataAdapter adapter = new OdbcDataAdapter(cmd);
adapter.Fill(set, tblName);
}
catch (Exception ex)
{
throw ex;
}
finally
{
if (null != conn)
{
conn.Close();
}
}
return set;
}
}
}
执行结果如下:
- Java JDBC连接HIve
至于网上转载的资料说需要一大堆其他的Jar包都被hive-jdbc-2.0.1-standalone.jar集成了。注意,hive-jdbc-2.0.1-standalone.jar的版本号要与连接的Hive版本对应,第一次下载了最新的
2.1.1结果报错说连接无法解析。代码如下:
package testHive;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class testHive {
private static String driverName = "org.apache.hive.jdbc.HiveDriver";
private static String url = "jdbc:hive2://10.0.83.21:10000/default";
private static String user = "hive";
private static String password = "";
private static String sql = "";
private static ResultSet res;
public static void main(String[] args) {
Connection conn = null;
Statement stmt = null;
try {
conn = getConn();
stmt = conn.createStatement();
String tableName = "wgmes.tq_zbs_if";
// 执行 regular hive query 统计操作
long starTime=System.currentTimeMillis();
String result=countData(stmt, tableName);
long endTime=System.currentTimeMillis();
long TimeSpan=(endTime-starTime)/1000;
System.out.println("执行“regular hive query”运行结果:");
System.out.println("count ------>" + result+"rn"+"TimeCost=" + TimeSpan+"s");
} catch (ClassNotFoundException e) {
e.printStackTrace();
System.exit(1);
} catch (SQLException e) {
e.printStackTrace();
System.exit(1);
} finally {
try {
if (conn != null) {
conn.close();
conn = null;
}
if (stmt != null) {
stmt.close();
stmt = null;
}
} catch (SQLException e) {
e.printStackTrace();
}
}
}
private static String countData(Statement stmt, String tableName)
throws SQLException {
sql = "select count(*) from " + tableName;
System.out.println("Running:" + sql);
res = stmt.executeQuery(sql);
String results="";
while (res.next()) {
results= res.getString(1);
break;
}
return results;
}
private static void selectData(Statement stmt, String tableName)
throws SQLException {
sql = "select * from " + tableName;
System.out.println("Running:" + sql);
res = stmt.executeQuery(sql);
System.out.println("执行 select * query 运行结果:");
while (res.next()) {
System.out.println(res.getInt(1) + "t" + res.getString(2));
}
}
private static void loadData(Statement stmt, String tableName)
throws SQLException {
String filepath = "/home/hadoop01/data";
sql = "load data local inpath '" + filepath + "' into table "
+ tableName;
System.out.println("Running:" + sql);
res = stmt.executeQuery(sql);
}
private static void describeTables(Statement stmt, String tableName)
throws SQLException {
sql = "describe " + tableName;
System.out.println("Running:" + sql);
res = stmt.executeQuery(sql);
System.out.println("执行 describe table 运行结果:");
while (res.next()) {
System.out.println(res.getString(1) + "t" + res.getString(2));
}
}
private static void showTables(Statement stmt, String tableName)
throws SQLException {
sql = "show tables '" + tableName + "'";
System.out.println("Running:" + sql);
res = stmt.executeQuery(sql);
System.out.println("执行 show tables 运行结果:");
if (res.next()) {
System.out.println(res.getString(1));
}
}
private static void createTable(Statement stmt, String tableName)
throws SQLException {
sql = "create table "
+ tableName
+ " (key int, value string) row format delimited fields terminated by 't'";
stmt.executeQuery(sql);
}
private static String dropTable(Statement stmt,String tableName) throws SQLException {
sql = "drop table " + tableName;
stmt.executeQuery(sql);
return tableName;
}
private static Connection getConn() throws ClassNotFoundException,
SQLException {
Class.forName(driverName);
Connection conn = DriverManager.getConnection(url, user, password);
return conn;
}
}
执行结果如下图:
最后
以上就是过时棉花糖为你收集整理的.NET ODBC与JAVA JDBC连接HIVE查询数据的性能比较的全部内容,希望文章能够帮你解决.NET ODBC与JAVA JDBC连接HIVE查询数据的性能比较所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复