概述
一点一点的成长
题目来源于网络,思路自己写的有用到hql语句,或者sql语句
-- 第一题:求:找出所有科目成绩都大于某一学科平均成绩的学生
/*
数据:1001 01 90
1001 02 90
1001 03 90
1002 01 85
1002 02 85
1002 03 70
1003 01 70
1003 02 70
1003 03 85
表结构:uid,subject_id,score
*/
--(1) 建表:
create table score(
uid string,
subject_id string,
score int)
row format delimited field terminated by 't';
-- 求平均成绩
select uid,score,avg(score) over(partition by subject_id) avg_score from score;t1
-- 求学生分数如果大于平局分数则flag=0 否则为1
select uid,if(score>avg_score,0,1) flag from t1;t2
-- 按照uid聚合,如果sum(flag)=0,则符合题意
select uid from t2 group by uid having sum(flag)=0;
--整合
select uid
from (
select uid,
if(score>avg_score,0,1) flag
from (
select uid,
score,avg(score) over(partition by subject_id) avg_score
from score)t1)t2
group by uid
having sum(flag)=0;
-- 第二题
/* 数据源
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
*/
-- 求每个用户每个月访问量,及累计量
/*要求使用SQL统计出每个用户每个月的累积访问次数,如下表所示:
用户id 月份 小计 累积
u01 2017-01 11 11
u01 2017-02 12 23
u02 2017-01 12 12
u03 2017-01 8 8
u04 2017-01 3 3
*/
-- 建表:
create table action
(userId string,
visitDate string,
visitCount int)
row format delimited fields terminated by "t";
-- 给日期划分格式
select
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action;t1
-- 计算单月访问
select userId,mn,sum(visitCount) mn_count from t1 group by userId,mn;t2
-- 计算累计访问
select
userId,
mn,
mn_count,
sum(mn_count) over(partition by userId order by mn)
from t2
-- 整合
select
userId,
mn,
mn_count,
sum(mn_count) over(partition by userId order by mn) sum_count
from (
select
userId,
mn,
sum(visitCount) mn_count
from (
select
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action
)t1
group by userId,mn
)t2
-- 方式二(使用sql语句)
-- 更改日期格式
select
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action;t1
-- 统计每个用户,每个月的访问量
select
userId,
mn,
sum(visitCount) mn_count
from t1
group by userId,mn;t2
-- 聚合两张表,并左边表的月份要大于等于右边表的,这样得到1对多关系
select t2.userId as 2_uid,t2.mn as 2_mn,t2.mn_count as 2_mn_count,
t3.userId as 3_uid ,t3.mn as 3_mn ,t3.mn_count as 3_mn_count
from t2
join t3
on t2.userId = t3.userId
where t2.mn>=t3.mn;t4
-- 在按照t2.mn分组就可以得到累积值了
select
2_uid,
2_mn,
2_mn_count,
sum(3_mn_count) as sum_visite
from t4
group by 2_uid,2_mn,2_mn_count
-- 整合
select
2_uid,
2_mn,
2_mn_count,
sum(3_mn_count) as sum_visite
from (select t2.userId as 2_uid,t2.mn as 2_mn,t2.mn_count as 2_mn_count,
t3.userId as 3_uid ,t3.mn as 3_mn ,t3.mn_count as 3_mn_count
from (select
userId,
mn,
sum(visitCount) mn_count
from (select
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action)t1
group by userId,mn)t2
join (select
userId,
mn,
sum(visitCount) mn_count
from (select
userId,
date_format(regexp_replace(visitDate,'/','-'),'yyyy-MM') mn,
visitCount
from action)t1
group by userId,mn)t3
on t2.userId = t3.userId
where t2.mn>=t3.mn)t4
group by 2_uid,2_mn,2_mn_count
-- 第三题
-- 第四题
-- 第五题
/*
有日志如下,请写出代码求得所有用户和活跃用户的总数及平均年龄。(活跃用户指连续两天都有访问记录的用户)日期 用户 年龄
数据集
2019-02-11,test_1,23
2019-02-11,test_2,19
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-11,test_3,39
2019-02-11,test_1,23
2019-02-12,test_2,19
2019-02-13,test_1,23
2019-02-15,test_2,19
2019-02-16,test_2,19
*/
-- 建表:
create table user_age(dt string,user_id string,age int)row format delimited fields terminated by ',';
-- 求日活思路,1、按用户id与日期排名,2、在用行中的日期减排名,得到的值如果有两条及以上相同的说明是连续登录,例如以下数据进行日期减排名
2019-02-11 test_1 23 1
2019-02-13 test_1 23 2
2019-02-11 test_2 19 1
2019-02-12 test_2 19 2
2019-02-15 test_2 19 3
2019-02-16 test_2 19 4
2019-02-11 test_3 39 1
-- 按照日期以及用户分组,按照日期排序并给出排名
select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id;t1
-- 计算日期及排名的差值
select
user_id,
age,
date_sub(dt,rk) flag
from
t1;t2
-- 过滤出差值大于等于2的,即为连续两天活跃的用户
select
user_id,
min(age) age
from
t2
group by
user_id,flag
having
count(*)>=2;t3
-- 1、求活跃用户,整合
select
user_id,
min(age) age
from
(select
user_id,
age,
date_sub(dt,rk) flag
from
(select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id)t1)t2
group by
user_id,flag
having
count(*)>=2;
-- 对日活进行去重(例如:a用户在1月10号1月11号以及1月20号和1月21号4天登录。)
select
distinct(user_id) user_id,
age
from
(select
user_id,
min(age) age
from
(select
user_id,
age,
date_sub(dt,rk) flag
from
(select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id)t1)t2
group by
user_id,flag
having
count(*)>=2)t3;t4
-- 计算活跃用户(两天连续有访问)的人数以及平均年龄
select
count(user_id) twice_count,
avg(age) twice_avg
from
(select
distinct(user_id) user_id,
age
from
(select
user_id,
min(age) age
from
(select
user_id,
age,
date_sub(dt,rk) flag
from
(select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id)t1)t2
group by
user_id,flag
having
count(*)>=2)t3)t4;t5
-- 求所有用户的总数,以及平均年龄
-- 对用户进行去重
select
distinct(user_id),
age
from
user_age;t6
-- 求总数,以及平均年龄
select
count(*),
avg(age)
from
t6;t7
-- 整合
select
count(*) sum_count,
avg(age) sum_age
from
(select
distinct(user_id),
age
from
user_age)t6;t7
-- 大整合
--所有用户和活跃用户的总数及平均年龄
-- union 两个表
select
sum_count,
sum_age,
0,
0
from
(select
count(*) sum_count,
avg(age) sum_age
from
(select
distinct(user_id),
age
from
user_age)t6)t7
union
select
0,
0,
twice_count,
twice_avg
from
(select
count(user_id) twice_count,
avg(age) twice_avg
from
(select
distinct(user_id) user_id,
age
from
(select
user_id,
min(age) age
from
(select
user_id,
age,
date_sub(dt,rk) flag
from
(select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id)t1)t2
group by
user_id,flag
having
count(*)>=2)t3)t4)t5;t8
-- 全汇总
/*
'所有用户的总和',
'所有用户的平均年龄',
'活跃用户的总数',
'活跃用户的平均年龄'*/
select
sum(sc) `所有用户的总和`,
sum(sa) `所有用户的平均年龄`,
sum(tc) `活跃用户的总数`,
sum(ta) `活跃用户的平均年龄`
from
(select
sum_count sc,
sum_age sa,
0 tc,
0 ta
from
(select
count(*) sum_count,
avg(age) sum_age
from
(select
distinct(user_id),
age
from
user_age)t6)t7
union
select
0 sc,
0 sa,
twice_count tc,
twice_avg ta
from
(select
count(user_id) twice_count,
avg(age) twice_avg
from
(select
distinct(user_id) user_id,
age
from
(select
user_id,
min(age) age
from
(select
user_id,
age,
date_sub(dt,rk) flag
from
(select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id)t1)t2
group by
user_id,flag
having
count(*)>=2)t3)t4)t5)t8;
-- 全汇总方式二
select
t7.sc `所有用户的总和`,
t7.sa `所有用户的平均年龄`,
t5.tc `活跃用户的总数`,
t5.ta `活跃用户的平均年龄`
from
(select
count(user_id) tc,
avg(age) ta
from
(select
distinct(user_id) user_id,
age
from
(select
user_id,
min(age) age
from
(select
user_id,
age,
date_sub(dt,rk) flag
from
(select
dt,
user_id,
min(age) age,
rank() over(partition by user_id order by dt) rk
from
user_age
group by
dt,user_id)t1)t2
group by
user_id,flag
having
count(*)>=2)t3)t4)t5,
(select
count(*) sc,
avg(age) sa
from
(select
distinct(user_id),
age
from
user_age)t6)t7
-- 第六题:请用sql写出所有用户中在今年10月份第一次购买商品的金额,表ordertable字段(购买用户:userid,金额:money,购买时间:paymenttime(格式:2017-10-01),订单id:orderid)
-- 建表:
CREATE TABLE ordertable(userid string,money int,paymenttime string,orderid string)row format delimited fields terminated by 't';
-- 法一
-- 得到10月份订单表
select
userid,
money,
paymenttime,
orderid
from ordertable
where date_format(paymenttime,'yyyy-MM') = date_format('2020-10','yyyy-MM');t1
-- 按用户分组,按时间排序
select
userid,
money,
paymenttime,
rank() over(partition by userid order by paymenttime asc) pay_rank
from
t1;t2
-- 得到最终结果
select
userid,
money
from t2
where pay_rank =1;
-- 整合
select
userid,
money
from (select
userid,
money,
paymenttime,
rank() over(partition by userid order by paymenttime asc) pay_rank
from
(select
userid,
money,
paymenttime,
orderid
from ordertable
where date_format(paymenttime,'yyyy-MM') = date_format('2020-10','yyyy-MM'))t1)t2
where pay_rank =1;
-- 法二
-- 查出10月最早时间下单
select
userid,
min(paymenttime) paymenttime
from
ordertable
where
date_format(paymenttime,'yyyy-MM')='2020-10'
group by
userid;t1
-- 关联原表得到金额
select
t1.userid,
t1.paymenttime,
od.money
from
t1
join
ordertable od
on
t1.userid=od.userid
and
t1.paymenttime=od.paymenttime;
-- 整合
select
t1.userid,
t1.paymenttime,
od.money
from
(select
userid,
min(paymenttime) paymenttime
from
ordertable
where
date_format(paymenttime,'yyyy-MM')='2020-10'
group by
userid)t1
join
ordertable od
on
t1.userid=od.userid
and
t1.paymenttime=od.paymenttime;
-- 第七题
/*
有一个线上服务器访问日志格式如下(用sql答题)
时间 接口 ip地址
2016-11-09 11:22:05 /api/user/login 110.23.5.33
2016-11-09 11:23:10 /api/user/detail 57.3.2.16
.....
2016-11-09 23:59:40 /api/user/login 200.6.5.166
求11月9号下午14点(14-15点),访问api/user/login接口的top10的ip地址
*/
-- 数据集
/*
2016-11-09 14:22:05 /api/user/login 110.23.5.33
2016-11-09 11:23:10 /api/user/detail 57.3.2.16
2016-11-09 14:59:40 /api/user/login 200.6.5.166
2016-11-09 14:22:05 /api/user/login 110.23.5.34
2016-11-09 14:22:05 /api/user/login 110.23.5.34
2016-11-09 14:22:05 /api/user/login 110.23.5.34
2016-11-09 11:23:10 /api/user/detail 57.3.2.16
2016-11-09 23:59:40 /api/user/login 200.6.5.166
2016-11-09 14:22:05 /api/user/login 110.23.5.34
2016-11-09 11:23:10 /api/user/detail 57.3.2.16
2016-11-09 23:59:40 /api/user/login 200.6.5.166
2016-11-09 14:22:05 /api/user/login 110.23.5.35
2016-11-09 14:23:10 /api/user/detail 57.3.2.16
2016-11-09 23:59:40 /api/user/login 200.6.5.166
2016-11-09 14:59:40 /api/user/login 200.6.5.166
2016-11-09 14:59:40 /api/user/login 200.6.5.166
*/
-- 建表:
create table ip(
time string,
interface string,
ip string)
row format delimited fields terminated by 't';
-- 实现
select
ip,
interface,
count(*) sum_search
from ip
where date_format(time,'yyyy-MM-dd HH')>='2016-11-09 14'
and
date_format(time,'yyyy-MM-dd HH')<='2016-11-09 15'
and
interface = '/api/user/login'
group by ip,interface
order by sum_search desc
limit 10
最后
以上就是心灵美水杯为你收集整理的hql题及答案的全部内容,希望文章能够帮你解决hql题及答案所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复