我是靠谱客的博主 洁净自行车,最近开发中收集的这篇文章主要介绍hive电商项目:2用户主题,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

创建dw层的数据
##为新增用户、总用户、活跃用户做计算:
create table if not exists dwd_user(
pl string,
pl_id string,
en string,
en_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
province_name string,
province_id string,
city_name string,
city_id string,
uid string
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;

导入数据:
from (
select
ol.pl,
dp.id as pl_id,
ol.en,
de.id as en_id,
ol.browser_name,
db.id browser_id,
ol.browser_version ,
dv.id browser_version_id,
ol.province ,
pv.id province_id,
ol.city ,
dc.id city_id,
ol.u_ud
from ods.ods_logs_orc ol
left join dim.dim_platform dp on dp.platform_name = ol.pl
left join dim.dim_event_name de on de.name = ol.en
left join dim.dim_browser_name db on db.browser_name = ol.browser_name
left join dim.dim_browser_version dv on dv.browser_version = ol.browser_version
left join dim.dim_province pv on pv.province = ol.province
left join dim.dim_city dc on dc.city = ol.city
) tmp
insert into dwd_user partition (month='1',day = '01')
select *
;

-------------------------------

创建dm层的数据:
#用户主题下新增用户、新增总用户、日活跃用户
create table if not exists dm_user_users(
pl string,
pl_id string,
new_user_count int,
new_total_user_count int,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;

导入数据:
from (
select 
us.pl,
us.pl_id,
count(distinct us.uid) as new_user_count,
count(distinct us.uid) + nvl(uu.new_total_user_count,0) as new_total_user_count,
0 as active_user_count
from dwd.dwd_user us
left join dm_user_users uu on uu.month = '1' and uu.day = '00' and us.pl_id = uu.pl_id
where us.month = 1 and us.day = '01' and us.en_id = 1
group by us.pl,us.pl_id,uu.new_total_user_count
union all select 
us.pl,
us.pl_id,
0 as new_user_count,
0 as new_total_user_count,
count(distinct us.uid) as active_user_count
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id)tmp

insert into dm_user_users partition (month = 1,day ='01')

select tmp.pl,tmp.pl_id,
sum(tmp.new_user_count) new_user_count,
sum(tmp.new_total_user_count) new_total_user_count,
sum(tmp.active_user_count) active_user_count

group by tmp.pl,tmp.pl_id
;
注:dwd_user.en_id = 1 表示lunch事件
输出:
dm_user_users.pl        dm_user_users.pl_id     dm_user_users.new_user_count    dm_user_users.new_total_user_count  dm_user_users.active_user_count  dm_user_users.month     dm_user_users.day
java    NULL    0       0       1       1       01
java_server     1       0       0       2       1       01
website 2       5       5       8       1       01


#浏览器模块下的新增用户、新增总用户、日活跃用户
create table if not exists dm_browser_users(
pl string,
pl_id string,
browser_name string,
browser_id string,
browser_version string,
browser_version_id string,
new_user_count int,
new_total_user_count int,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;
#分析:平台和浏览器有关,因为有平台相关字段
#browser_version_id和浏览器类型无关
from (
select 
us.pl,
us.pl_id,
us.browser_name,
us.browser_id,
us.browser_version,
us.browser_version_id,
count(distinct us.uid) as new_user_count,
count(distinct us.uid) + nvl(bu.new_total_user_count,0) as new_total_user_count,
0 as active_user_count
from dwd.dwd_user us
left join dm_browser_users bu on bu.month = '1' and bu.day = '00' and bu.pl_id=us.pl_id and 
bu.browser_id = us.browser_id and 
bu.browser_version_id = us.browser_version_id 
where us.month = 1 and us.day = '01' and us.en_id =1
group by us.pl,us.pl_id,us.browser_name,us.browser_id,us.browser_version,us.browser_version_id,bu.new_total_user_count
union all select 
us.pl,
us.pl_id,
us.browser_name,
us.browser_id,
us.browser_version,
us.browser_version_id,
0 as new_user_count,
0 as new_total_user_count,
count(distinct us.uid) as active_user_count
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id,us.browser_name,us.browser_id,us.browser_version,us.browser_version_id)tmp
insert into dm_browser_users partition (month = 1,day ='01')
select 
tmp.pl,
tmp.pl_id,
tmp.browser_name,
tmp.browser_id,
tmp.browser_version,
tmp.browser_version_id,
sum(tmp.new_user_count) new_user_count,
sum(tmp.new_total_user_count) new_total_user_count,
sum(tmp.active_user_count) active_user_count
group by tmp.pl,tmp.pl_id,tmp.browser_name,tmp.browser_id,tmp.browser_version,tmp.browser_version_id;
输出:
dm_browser_users.pl     dm_browser_users.pl_id  dm_browser_users.browser_name   dm_browser_users.browser_id     dm_browser_users.browser_version     dm_browser_users.browser_version_id     dm_browser_users.new_user_count dm_browser_us                                     ers.new_total_user_count        dm_browser_users.active_user_count      dm_browser_users.month  dm_browser_users.day
java    NULL    Chrome  4       31.0.1650.63    4       0       0       1       1       01
java_server     1       Chrome  4       31.0.1650.63    4       0       0       1       1       01
java_server     1       null    3       null    3       0       0       1       1       01
website 2       Chrome  4       31.0.1650.63    4       0       0       1       1       01
website 2       Chrome  4       47.0.2526.106   NULL    1       1       1       1       01
website 2       Chrome  4       70.0.3538.77    NULL    1       1       2       1       01
website 2       Firefox NULL    63.0    NULL    0       0       1       1       01
website 2       IE      1       8.0     1       2       2       2       1       01
website 2       Sogou Explorer  2       2.X     2       1       1       1       1       01
Time taken: 0.177 seconds, Fetched: 9 row(s)

#地域主题下的活跃用户
create table if not exists dm_area_users(
pl string,
pl_id string,
province string,
province_id string,
city string,
city_id string,
active_user_count int
)
partitioned by (month string,day string)
row format delimited fields terminated by 'u0001'
stored as orc
;
from (
select 
us.pl,
us.pl_id,
us.province_name,
us.province_id,
us.city_name,
us.city_id,
count(distinct us.uid) as active_user
from dwd.dwd_user us
where us.month = 1 and us.day = '01'
group by us.pl,us.pl_id,us.province_name,us.province_id,us.city_name,us.city_id)tmp
insert into dm_area_users partition (month = 1,day ='01')
select 
tmp.pl,
tmp.pl_id,
tmp.province_name,
tmp.province_id,
tmp.city_name,
tmp.city_id,
tmp.active_user
group by tmp.pl,tmp.pl_id,tmp.province_name,tmp.province_id,tmp.city_name,tmp.city_id,tmp.active_user;

输出:
hive (dm)> select * from dm_area_users;
OK
dm_area_users.pl        dm_area_users.pl_id     dm_area_users.province  dm_area_users.province_id       dm_area_users.city   dm_area_users.city_id   dm_area_users.active_user_count dm_area_users.month     dm_area_users.day
java    NULL    北京市  2002    昌平区  NULL    1       1       01
java_server     1       广西省  NULL    广西南宁市      NULL    1       1       01
java_server     1       贵州省  2025    贵阳市  4266    1       1       01
website 2       北京市  2002    昌平区  NULL    4       1       01
website 2       广西省  NULL    广西南宁市      NULL    1       1       01
website 2       河南省  2017    平顶山市        4109    1       1       01
website 2       贵州省  2025    贵阳市  4266    2       1       01
website 2       贵州省  2025    黔西南州兴义市  NULL    2       1       01

 

最后

以上就是洁净自行车为你收集整理的hive电商项目:2用户主题的全部内容,希望文章能够帮你解决hive电商项目:2用户主题所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(44)

评论列表共有 0 条评论

立即
投稿
返回
顶部