概述
创建dw层的数据
##为新增用户、总用户、活跃用户做计算:
create table if not exists dwd_user(
pl string,
pl_id string,
en string,
en_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
province_name string,
province_id string,
city_name string,
city_id string,
uid string
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;
导入数据:
from (
select
ol.pl,
dp.id as pl_id,
ol.en,
de.id as en_id,
ol.browser_name,
db.id browser_id,
ol.browser_version ,
dv.id browser_version_id,
ol.province ,
pv.id province_id,
ol.city ,
dc.id city_id,
ol.u_ud
from ods.ods_logs_orc ol
left join dim.dim_platform dp on dp.platform_name = ol.pl
left join dim.dim_event_name de on de.name = ol.en
left join dim.dim_browser_name db on db.browser_name = ol.browser_name
left join dim.dim_browser_version dv on dv.browser_version = ol.browser_version
left join dim.dim_province pv on pv.province = ol.province
left join dim.dim_city dc on dc.city = ol.city
) tmp
insert into dwd_user partition (month='1',day = '01')
select *
;
-------------------------------
创建dm层的数据:
#用户主题下新增用户、新增总用户、日活跃用户
create table if not exists dm_user_users(
pl string,
pl_id string,
new_user_count int,
new_total_user_count int,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;
导入数据:
from (
select
us.pl,
us.pl_id,
count(distinct us.uid) as new_user_count,
count(distinct us.uid) + nvl(uu.new_total_user_count,0) as new_total_user_count,
0 as active_user_count
from dwd.dwd_user us
left join dm_user_users uu on uu.month = '1' and uu.day = '00' and us.pl_id = uu.pl_id
where us.month = 1 and us.day = '01' and us.en_id = 1
group by us.pl,us.pl_id,uu.new_total_user_count
union all select
us.pl,
us.pl_id,
0 as new_user_count,
0 as new_total_user_count,
count(distinct us.uid) as active_user_count
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id)tmp
insert into dm_user_users partition (month = 1,day ='01')
select tmp.pl,tmp.pl_id,
sum(tmp.new_user_count) new_user_count,
sum(tmp.new_total_user_count) new_total_user_count,
sum(tmp.active_user_count) active_user_count
group by tmp.pl,tmp.pl_id
;
注:dwd_user.en_id = 1 表示lunch事件
输出:
dm_user_users.pl dm_user_users.pl_id dm_user_users.new_user_count dm_user_users.new_total_user_count dm_user_users.active_user_count dm_user_users.month dm_user_users.day
java NULL 0 0 1 1 01
java_server 1 0 0 2 1 01
website 2 5 5 8 1 01
#浏览器模块下的新增用户、新增总用户、日活跃用户
create table if not exists dm_browser_users(
pl string,
pl_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
new_user_count int,
new_total_user_count int,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;
#分析:平台和浏览器有关,因为有平台相关字段
#browser_version_id和浏览器类型无关
from (
select
us.pl,
us.pl_id,
us.browser_name,
us.browser_id,
us.browser_version,
us.browser_version_id,
count(distinct us.uid) as new_user_count,
count(distinct us.uid) + nvl(bu.new_total_user_count,0) as new_total_user_count,
0 as active_user_count
from dwd.dwd_user us
left join dm_browser_users bu on bu.month = '1' and bu.day = '00' and bu.pl_id=us.pl_id and
bu.browser_id = us.browser_id and
bu.browser_version_id = us.browser_version_id
where us.month = 1 and us.day = '01' and us.en_id =1
group by us.pl,us.pl_id,us.browser_name,us.browser_id,us.browser_version,us.browser_version_id,bu.new_total_user_count
union all select
us.pl,
us.pl_id,
us.browser_name,
us.browser_id,
us.browser_version,
us.browser_version_id,
0 as new_user_count,
0 as new_total_user_count,
count(distinct us.uid) as active_user_count
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id,us.browser_name,us.browser_id,us.browser_version,us.browser_version_id)tmp
insert into dm_browser_users partition (month = 1,day ='01')
select
tmp.pl,
tmp.pl_id,
tmp.browser_name,
tmp.browser_id,
tmp.browser_version,
tmp.browser_version_id,
sum(tmp.new_user_count) new_user_count,
sum(tmp.new_total_user_count) new_total_user_count,
sum(tmp.active_user_count) active_user_count
group by tmp.pl,tmp.pl_id,tmp.browser_name,tmp.browser_id,tmp.browser_version,tmp.browser_version_id;
输出:
dm_browser_users.pl dm_browser_users.pl_id dm_browser_users.browser_name dm_browser_users.browser_id dm_browser_users.browser_version dm_browser_users.browser_version_id dm_browser_users.new_user_count dm_browser_us ers.new_total_user_count dm_browser_users.active_user_count dm_browser_users.month dm_browser_users.day
java NULL Chrome 4 31.0.1650.63 4 0 0 1 1 01
java_server 1 Chrome 4 31.0.1650.63 4 0 0 1 1 01
java_server 1 null 3 null 3 0 0 1 1 01
website 2 Chrome 4 31.0.1650.63 4 0 0 1 1 01
website 2 Chrome 4 47.0.2526.106 NULL 1 1 1 1 01
website 2 Chrome 4 70.0.3538.77 NULL 1 1 2 1 01
website 2 Firefox NULL 63.0 NULL 0 0 1 1 01
website 2 IE 1 8.0 1 2 2 2 1 01
website 2 Sogou Explorer 2 2.X 2 1 1 1 1 01
Time taken: 0.177 seconds, Fetched: 9 row(s)
#地域主题下的活跃用户
create table if not exists dm_area_users(
pl string,
pl_id string,
province string,
province_id string,
city string,
city_id string,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;
from (
select
us.pl,
us.pl_id,
us.province_name,
us.province_id,
us.city_name,
us.city_id,
count(distinct us.uid) as active_user
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id,us.province_name,us.province_id,us.city_name,us.city_id)tmp
insert into dm_area_users partition (month = 1,day ='01')
select
tmp.pl,
tmp.pl_id,
tmp.province_name,
tmp.province_id,
tmp.city_name,
tmp.city_id,
tmp.active_user
group by tmp.pl,tmp.pl_id,tmp.province_name,tmp.province_id,tmp.city_name,tmp.city_id,tmp.active_user;
输出:
hive (dm)> select * from dm_area_users;
OK
dm_area_users.pl dm_area_users.pl_id dm_area_users.province dm_area_users.province_id dm_area_users.city dm_area_users.city_id dm_area_users.active_user_count dm_area_users.month dm_area_users.day
java NULL 北京市 2002 昌平区 NULL 1 1 01
java_server 1 广西省 NULL 广西南宁市 NULL 1 1 01
java_server 1 贵州省 2025 贵阳市 4266 1 1 01
website 2 北京市 2002 昌平区 NULL 4 1 01
website 2 广西省 NULL 广西南宁市 NULL 1 1 01
website 2 河南省 2017 平顶山市 4109 1 1 01
website 2 贵州省 2025 贵阳市 4266 2 1 01
website 2 贵州省 2025 黔西南州兴义市 NULL 2 1 01
最后
以上就是洁净自行车为你收集整理的hive电商项目:2用户主题的全部内容,希望文章能够帮你解决hive电商项目:2用户主题所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复