概述
效果展示
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-YYZMeccW-1640344966005)(/Users/lifuwei/Library/Application Support/typora-user-images/image-20211224170025068.png)]
我的大致思路是
1、写了一个udf函数,这个函数的作用就是把对应的ip转换成省份,这个主要参考的是老师给的ip.txt(里面有很多ip所对应的地址)
2、把这个udf函数放到hive中,然后对ip进行解析,然后对解析出来的ip进行分组,并count()
3、然后用脚本实时查询这个结果并把这个结果放到mysql数据中
4、通过azkaban定时的去调度这个脚本,实现实时功能
遇到的困难点
1、写ip地址的转换
2、为了进行实时的更新不受原数据的影响,我进行了hive中的查询结果的show先删除再进行建立,sqoop在导入数据到mysql中的show表的时候,我用的是直接覆盖原数据。
3、在进行动态进行的展示的Spring MVC页面中,如何实现动图的效果,(这里采用的是和老师上课的效果相一致,把一个list分成两个list,然后就可以实现动态的展示)
Spring MVC中的代码
dao层(有两个)
package com.qf.bigdata.dao;
import com.qf.bigdata.pojo.Show;
import java.util.List;
public interface ShowMapper {
List<Show> select();
}
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.qf.bigdata.dao.ShowMapper">
<!-- 查询 -->
<select id="select" resultType="com.qf.bigdata.pojo.Show">
SELECT province,cnt FROM ` show`
</select>
</mapper>
pojo
package com.qf.bigdata.pojo;
public class Show {
private String province;
private Integer cnt;
public Show() {
}
public Show(String province, Integer cnt) {
this.province = province;
this.cnt = cnt;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public Integer getCnt() {
return cnt;
}
public void setCnt(Integer cnt) {
this.cnt = cnt;
}
@Override
public String toString() {
return "Show{" +
"province='" + province + ''' +
", cnt=" + cnt +
'}';
}
}
package com.qf.bigdata.pojo;
import java.util.List;
public class ShowVo {
private List<String> province;
private List<Integer> cnt;
public List<String> getProvince() {
return province;
}
public void setProvince(List<String> province) {
this.province = province;
}
public List<Integer> getCnt() {
return cnt;
}
public void setCnt(List<Integer> cnt) {
this.cnt = cnt;
}
public ShowVo() {
}
public ShowVo(List<String> province, List<Integer> cnt) {
this.province = province;
this.cnt = cnt;
}
@Override
public String toString() {
return "ShowVo{" +
"province=" + province +
", cnt=" + cnt +
'}';
}
}
service层
package com.qf.bigdata.service;
import com.qf.bigdata.pojo.ShowVo;
public interface ShowService {
ShowVo select();
}
package com.qf.bigdata.service.impl;
import com.qf.bigdata.dao.ShowMapper;
import com.qf.bigdata.pojo.Show;
import com.qf.bigdata.pojo.ShowVo;
import com.qf.bigdata.service.ShowService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Service
public class ShowServiceImpl implements ShowService {
@Autowired
private ShowMapper showMapper;
@Override
public ShowVo select() {
//1、查询结果
List<Show> list = showMapper.select();
//2、重新封装结果到Vo中
ShowVo vo = new ShowVo();
ArrayList<String> province = new ArrayList<>();
ArrayList<Integer> cnts = new ArrayList<>();
for (Show show : list) {
province.add(show.getProvince());
cnts.add(show.getCnt());
}
vo.setProvince(province);
vo.setCnt(cnts);
return vo;
}
}
Controller层
package com.qf.bigdata.web.servlet;
import com.qf.bigdata.pojo.ShowVo;
import com.qf.bigdata.service.ShowService;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.ResponseBody;
@Controller
@RequestMapping("/show")
public class ShowController {
@Autowired
private ShowService showService;
@RequestMapping("/select")
@ResponseBody
public ShowVo select(){
return showService.select();
}
}
jsp
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<script src="jquery/jquery-1.8.3.min.js"></script>
<script src="jquery/echarts.min.js"></script>
<html>
<head>
<title>echarts</title>
</head>
<body>
<div id="main" style="width: 1200px; height: 800px; color:red"></div>
<button id="_btn">click me</button>
</body>
<script type="text/javascript">
/* 当页面一加载的时候就会调用此函数 */
$(function(){
$("#main").hide()
/* 使用javascript的方式获取到main的对象 */
var dom = document.getElementById("main") // $("#main")
/* 初始化一个echarts对象 */
var myChart = echarts.init(dom);
$("#_btn").click(function () {
// 发送ajax异步请求
// 发送ajax异步请求
$.ajax({
url:"http://localhost:8080/show/select.action",
success: function (vo) {
/* 我们静态渲染柱状图的数据 */
var option = {
xAxis: {
type: 'category',
data: vo.province
},
yAxis: {
type: 'value'
},
series: [
{
data: vo.cnt,
type: 'bar'
}
]
};
/* 将数据设置echarts实例 */
myChart.setOption(option)
$("#main").fadeIn("slow")
}
})
})
})
</script>
</html>
udf函数代码
package com.qf.bigdata
import org.apache.hadoop.hive.ql.exec.UDF
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.rdd.RDD
/*
* 解析ip的主函数
* */
object Demo1 extends UDF {
def evaluate(ip:String) : String = {
val sc = new SparkContext(new SparkConf().setAppName("Demo1").setMaster("local[*]"))
val ipLogRDD: RDD[String] = sc.textFile("src/main/resources/ip.txt")
val province2IPRDD: RDD[(String, Long, Long)] = ipLogRDD.map(log => {
val fields: Array[String] = log.split("\|")
val startIP: Long = IPUtils.ip2Long(fields(0))
val endIP: Long = IPUtils.ip2Long(fields(1))
val province: String = fields(6)
(province, startIP, endIP)
})
//对ip进行排序
val ipArr: Array[(String, Long, Long)] = province2IPRDD.collect().sortWith {
case ((province1, startIP1, endIP1), (province2, startIP2, endIP2)) => {
startIP1 < startIP2
}
}
if(ip.isEmpty && ip==null){
return "浙江";
}else{
val provinceIP: String = IPUtils.ip2address(ip, ipArr)
provinceIP
}
}
/*def main(args: Array[String]): Unit = {
val province: String = evaluate("182.89.118.170")
println(province)
}*/
}
package com.qf.bigdata
/**
* ip地址的转换
*/
object IPUtils {
def ip2address(ip:String, ipDataBase:Array[(String, Long, Long)]):String = {
//1. 转换为十进制数字
val ipNum: Long = ip2Long(ip)
//2. 解决ipNum是否存在于ipDataBase中,如果存在,那么在ipDataBase的哪个index
val index = binarySearch(ipNum, ipDataBase)
//3. 获取到地址
if(index != -1) return ipDataBase(index)._1
else return ""
}
def ip2Long(ip:String):Long = {
//1. 切割ip字符串
val fields: Array[String] = ip.split("\.")
//2. 遍历数组就能获取每个元素
var ipNum = 0L
for (field <- fields) {
ipNum = field.toLong | ipNum << 8
}
ipNum
}
/**
* 二分查找法,查找ip对应的索引
*/
def binarySearch(ip:Long, ipDataBase:Array[(String, Long, Long)]): Int = {
var start = 0
var end = ipDataBase.length - 1
while (start <= end) {
val mid = (start + end) / 2 // 中间的索引
val startIp = ipDataBase(mid)._2
val endIp = ipDataBase(mid)._3
if (ip >= startIp && ip <= endIp) return mid // 说明正好在范围内,直接返回mid
else if (ip < startIp) end = mid - 1
else start = mid + 1
}
return -1
}
}
脚本
init.hql
-- 导入jar
add jar /data/jars/ipparse.jar;
create temporary function `ipparse` as 'com.qf.bigdata.Demo1';
-- 先删除show表,为了进行数据更新
drop table if exists `ods_news`.`show`
-- 在hive中创建show表
create table if not exists `ods_news`.`show`(
province string,
cnt int
)
row format delimited fields terminated by '