我是靠谱客的博主 心灵美水杯,最近开发中收集的这篇文章主要介绍hql题及答案,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

一点一点的成长


题目来源于网络,思路自己写的有用到hql语句,或者sql语句

-- 第一题:求:找出所有科目成绩都大于某一学科平均成绩的学生
/*
数据:1001	01	90
      1001	02	90
      1001	03	90
      1002	01	85
      1002	02	85
      1002	03	70
      1003	01	70
      1003	02	70
      1003	03	85
	  
表结构:uid,subject_id,score
*/
--(1) 建表:
create table score(
uid string,
subject_id string,
score int)
row format delimited field terminated by 't';


-- 求平均成绩
select uid,score,avg(score)  over(partition by subject_id) avg_score from score;t1
-- 求学生分数如果大于平局分数则flag=0 否则为1
select uid,if(score>avg_score,0,1) flag from t1;t2
-- 按照uid聚合,如果sum(flag)=0,则符合题意
select uid from t2 group by uid having sum(flag)=0;
--整合
select uid 
from (
select uid,
if(score>avg_score,0,1) flag 
from (
select uid,
score,avg(score) over(partition by subject_id) avg_score
from score)t1)t2 
group by uid 
having sum(flag)=0;




-- 第二题
/* 数据源
u01     2017/1/21       5
u02     2017/1/23       6
u03     2017/1/22       8
u04     2017/1/20       3
u01     2017/1/23       6
u01     2017/2/21       8
u02     2017/1/23       6
u01     2017/2/22       4
*/
-- 求每个用户每个月访问量,及累计量
/*要求使用SQL统计出每个用户每个月的累积访问次数,如下表所示:
用户id	月份	小计	累积
u01	2017-01	11	11
u01	2017-02	12	23
u02	2017-01	12	12
u03	2017-01	8	8
u04	2017-01	3	3
*/
-- 建表:
create table action
(userId string,
visitDate string,
visitCount int) 
row format delimited fields terminated by "t";


-- 给日期划分格式
select 
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action;t1

-- 计算单月访问
select userId,mn,sum(visitCount) mn_count from t1 group by userId,mn;t2

-- 计算累计访问
select 
userId,
mn,
mn_count,
sum(mn_count) over(partition by userId order by mn) 
from t2

-- 整合
select 
userId,
mn,
mn_count,
sum(mn_count) over(partition by userId order by mn) sum_count
from (
select 
userId,
mn,
sum(visitCount) mn_count 
from (
select 
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action
)t1
group by userId,mn
)t2

-- 方式二(使用sql语句)
-- 更改日期格式
select 
   userId,
   date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
   visitCount
from action;t1

-- 统计每个用户,每个月的访问量
select 
   userId,
   mn,
   sum(visitCount) mn_count
from t1
group by userId,mn;t2

-- 聚合两张表,并左边表的月份要大于等于右边表的,这样得到1对多关系
select t2.userId as 2_uid,t2.mn as 2_mn,t2.mn_count as 2_mn_count,
t3.userId as 3_uid ,t3.mn as 3_mn ,t3.mn_count as 3_mn_count
from t2
join t3
on t2.userId = t3.userId
where t2.mn>=t3.mn;t4

-- 在按照t2.mn分组就可以得到累积值了
select 
2_uid,
2_mn,
2_mn_count,
sum(3_mn_count) as sum_visite
from t4
group by 2_uid,2_mn,2_mn_count

-- 整合
select 
2_uid,
2_mn,
2_mn_count,
sum(3_mn_count) as sum_visite
from (select t2.userId as 2_uid,t2.mn as 2_mn,t2.mn_count as 2_mn_count,
t3.userId as 3_uid ,t3.mn as 3_mn ,t3.mn_count as 3_mn_count
from (select 
   userId,
   mn,
   sum(visitCount) mn_count
from (select 
   userId,
   date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
   visitCount
from action)t1
group by userId,mn)t2
join (select 
   userId,
   mn,
   sum(visitCount) mn_count
from (select 
   userId,
   date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
   visitCount
from action)t1
group by userId,mn)t3
on t2.userId = t3.userId
where t2.mn>=t3.mn)t4
group by 2_uid,2_mn,2_mn_count


-- 第三题

-- 第四题

-- 第五题
/*
有日志如下,请写出代码求得所有用户和活跃用户的总数及平均年龄。(活跃用户指连续两天都有访问记录的用户)日期 用户 年龄
数据集
2019-02-11,test_1,23
2019-02-11,test_2,19
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-12,test_2,19
2019-02-13,test_1,23
2019-02-15,test_2,19
2019-02-16,test_2,19

*/
-- 建表:
create table user_age(dt string,user_id string,age int)row format delimited fields terminated by ',';

-- 求日活思路,1、按用户id与日期排名,2、在用行中的日期减排名,得到的值如果有两条及以上相同的说明是连续登录,例如以下数据进行日期减排名
2019-02-11	test_1	23	1
2019-02-13	test_1	23	2
2019-02-11	test_2	19	1
2019-02-12	test_2	19	2
2019-02-15	test_2	19	3
2019-02-16	test_2	19	4
2019-02-11	test_3	39	1

-- 按照日期以及用户分组,按照日期排序并给出排名
select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id;t1

-- 计算日期及排名的差值
select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   t1;t2
   
-- 过滤出差值大于等于2的,即为连续两天活跃的用户
select
   user_id,
   min(age) age
from
   t2
group by
   user_id,flag
having
   count(*)>=2;t3 
   
-- 1、求活跃用户,整合
select
   user_id,
   min(age) age
from
   (select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id)t1)t2
group by
   user_id,flag
having
   count(*)>=2;

-- 对日活进行去重(例如:a用户在1月10号1月11号以及1月20号和1月21号4天登录。)
select 
    distinct(user_id) user_id,
    age 
from 
    (select
   user_id,
   min(age) age
from
   (select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id)t1)t2
group by
   user_id,flag
having
   count(*)>=2)t3;t4


-- 计算活跃用户(两天连续有访问)的人数以及平均年龄
select 
    count(user_id) twice_count,
    avg(age) twice_avg
from
    (select 
    distinct(user_id) user_id,
    age 
from 
    (select
   user_id,
   min(age) age
from
   (select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id)t1)t2
group by
   user_id,flag
having
   count(*)>=2)t3)t4;t5
   
-- 求所有用户的总数,以及平均年龄
-- 对用户进行去重
select 
    distinct(user_id),
    age
from 
    user_age;t6

-- 求总数,以及平均年龄
select
     count(*),
     avg(age)
from 
     t6;t7

-- 整合
select
     count(*) sum_count,
     avg(age) sum_age
from 
     (select 
    distinct(user_id),
    age
from 
    user_age)t6;t7

-- 大整合
--所有用户和活跃用户的总数及平均年龄
-- union 两个表
select 
    sum_count,
    sum_age,
    0,
    0
from 
    (select
     count(*) sum_count,
     avg(age) sum_age
from 
     (select 
    distinct(user_id),
    age
from 
    user_age)t6)t7
union
select
    0,
    0,
    twice_count,
    twice_avg
from 
    (select 
    count(user_id) twice_count,
    avg(age) twice_avg
from
    (select 
    distinct(user_id) user_id,
    age 
from 
    (select
   user_id,
   min(age) age
from
   (select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id)t1)t2
group by
   user_id,flag
having
   count(*)>=2)t3)t4)t5;t8

-- 全汇总
/*
'所有用户的总和',    
'所有用户的平均年龄',
'活跃用户的总数',
 '活跃用户的平均年龄'*/
select 
    sum(sc) `所有用户的总和`,
    sum(sa) `所有用户的平均年龄`,
    sum(tc) `活跃用户的总数`,
    sum(ta) `活跃用户的平均年龄`
from 
    (select 
    sum_count sc,
    sum_age sa,
    0 tc,
    0 ta
from 
    (select
     count(*) sum_count,
     avg(age) sum_age
from 
     (select 
    distinct(user_id),
    age
from 
    user_age)t6)t7
union
select
    0 sc,
    0 sa,
    twice_count tc,
    twice_avg ta
from 
    (select 
    count(user_id) twice_count,
    avg(age) twice_avg
from
    (select 
    distinct(user_id) user_id,
    age 
from 
    (select
   user_id,
   min(age) age
from
   (select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id)t1)t2
group by
   user_id,flag
having
   count(*)>=2)t3)t4)t5)t8;


-- 全汇总方式二
select 
    t7.sc `所有用户的总和`,
    t7.sa `所有用户的平均年龄`,
    t5.tc `活跃用户的总数`,
    t5.ta `活跃用户的平均年龄`
from 
    (select 
    count(user_id) tc,
    avg(age) ta
from
    (select 
    distinct(user_id) user_id,
    age 
from 
    (select
   user_id,
   min(age) age
from
   (select
   user_id,
   age,
   date_sub(dt,rk) flag
from
   (select
   dt,
   user_id,
   min(age) age,
   rank() over(partition by user_id order by dt) rk
from
   user_age
group by
   dt,user_id)t1)t2
group by
   user_id,flag
having
   count(*)>=2)t3)t4)t5,
    (select
     count(*) sc,
     avg(age) sa
from 
     (select 
    distinct(user_id),
    age
from 
    user_age)t6)t7


-- 第六题:请用sql写出所有用户中在今年10月份第一次购买商品的金额,表ordertable字段(购买用户:userid,金额:money,购买时间:paymenttime(格式:2017-10-01),订单id:orderid)


-- 建表:
CREATE TABLE ordertable(userid string,money int,paymenttime string,orderid string)row format delimited fields terminated by 't'; 

-- 法一
-- 得到10月份订单表
select 
    userid,
    money,
    paymenttime,
    orderid
from ordertable
where date_format(paymenttime,'yyyy-MM') = date_format('2020-10','yyyy-MM');t1

-- 按用户分组,按时间排序
select 
   userid,
   money,
   paymenttime,
   rank() over(partition by userid order by paymenttime asc) pay_rank
from 
   t1;t2

-- 得到最终结果
select
   userid,
   money
from t2
where pay_rank =1;

-- 整合
select
   userid,
   money
from (select 
   userid,
   money,
   paymenttime,
   rank() over(partition by userid order by paymenttime asc) pay_rank
from 
   (select 
    userid,
    money,
    paymenttime,
    orderid
from ordertable
where date_format(paymenttime,'yyyy-MM') = date_format('2020-10','yyyy-MM'))t1)t2
where pay_rank =1;


-- 法二
-- 查出10月最早时间下单
select
   userid,
   min(paymenttime) paymenttime
from
   ordertable
where
   date_format(paymenttime,'yyyy-MM')='2020-10'
group by
   userid;t1
   
-- 关联原表得到金额
select
   t1.userid,
   t1.paymenttime,
   od.money
from
   t1
join
   ordertable od
on
   t1.userid=od.userid
   and
   t1.paymenttime=od.paymenttime;

--   整合
select
   t1.userid,
   t1.paymenttime,
   od.money
from
   (select
   userid,
   min(paymenttime) paymenttime
from
   ordertable
where
   date_format(paymenttime,'yyyy-MM')='2020-10'
group by
   userid)t1
join
   ordertable od
on
   t1.userid=od.userid
   and
   t1.paymenttime=od.paymenttime;


-- 第七题
/*
有一个线上服务器访问日志格式如下(用sql答题)
时间                    接口                         ip地址
2016-11-09 11:22:05    /api/user/login                  110.23.5.33
2016-11-09 11:23:10    /api/user/detail                  57.3.2.16
.....
2016-11-09 23:59:40    /api/user/login                  200.6.5.166
求11月9号下午14点(14-15点),访问api/user/login接口的top10的ip地址

*/
-- 数据集
/*
2016-11-09 14:22:05	/api/user/login	110.23.5.33
2016-11-09 11:23:10	/api/user/detail	57.3.2.16
2016-11-09 14:59:40	/api/user/login	200.6.5.166
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 11:23:10	/api/user/detail	57.3.2.16
2016-11-09 23:59:40	/api/user/login	200.6.5.166
2016-11-09 14:22:05	/api/user/login	110.23.5.34
2016-11-09 11:23:10	/api/user/detail	57.3.2.16
2016-11-09 23:59:40	/api/user/login	200.6.5.166
2016-11-09 14:22:05	/api/user/login	110.23.5.35
2016-11-09 14:23:10	/api/user/detail	57.3.2.16
2016-11-09 23:59:40	/api/user/login	200.6.5.166
2016-11-09 14:59:40	/api/user/login	200.6.5.166
2016-11-09 14:59:40	/api/user/login	200.6.5.166


*/
-- 建表: 
create table ip(
    time string,
    interface string,
    ip string)
row format delimited fields terminated by 't';

-- 实现
select 
    ip,
    interface,
    count(*) sum_search
from ip
where date_format(time,'yyyy-MM-dd HH')>='2016-11-09 14'
      and 
      date_format(time,'yyyy-MM-dd HH')<='2016-11-09 15' 
      and
      interface = '/api/user/login'
group by ip,interface
order by sum_search desc
limit 10

最后

以上就是心灵美水杯为你收集整理的hql题及答案的全部内容,希望文章能够帮你解决hql题及答案所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(60)

评论列表共有 0 条评论

立即
投稿
返回
顶部