我是靠谱客的博主 舒适小虾米,最近开发中收集的这篇文章主要介绍【电商数仓】数仓搭建之数据主题(data warehouse topic-- DWT)层(DWT层之访客、优惠券、活动、地区主题)一 访客主题二 优惠券主题三 活动主题四 地区主题,觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

文章目录

  • 一 访客主题
    • 1 建表语句
    • 2 数据导入
  • 二 优惠券主题
    • 1 建表语句
    • 2 数据导入
      • (1)首日导入
      • (2)每日导入
  • 三 活动主题
    • 1 建表语句
    • 2 数据导入
      • (1)首日导入
      • (2)每日导入
  • 四 地区主题
    • 1 建表语句
    • 2 数据导入
      • (1)首日导入
      • (2)每日导入

一 访客主题

1 建表语句

DROP TABLE IF EXISTS dwt_visitor_topic;
CREATE EXTERNAL TABLE dwt_visitor_topic
(
`mid_id` STRING COMMENT '设备id',
`brand` STRING COMMENT '手机品牌',
`model` STRING COMMENT '手机型号',
`channel` ARRAY<STRING> COMMENT '渠道',
`os` ARRAY<STRING> COMMENT '操作系统',
`area_code` ARRAY<STRING> COMMENT '地区ID',
`version_code` ARRAY<STRING> COMMENT '应用版本',
`visit_date_first` STRING
COMMENT '首次访问时间',
`visit_date_last` STRING
COMMENT '末次访问时间',
`visit_last_1d_count` BIGINT COMMENT '最近1日访问次数',
`visit_last_1d_day_count` BIGINT COMMENT '最近1日访问天数',
`visit_last_7d_count` BIGINT COMMENT '最近7日访问次数',
`visit_last_7d_day_count` BIGINT COMMENT '最近7日访问天数',
`visit_last_30d_count` BIGINT COMMENT '最近30日访问次数',
`visit_last_30d_day_count` BIGINT COMMENT '最近30日访问天数',
`visit_count` BIGINT COMMENT '累积访问次数',--用户行为日志历史数据无法获取,故该字段实为从数仓搭建日至今的累积值
`visit_day_count` BIGINT COMMENT '累积访问天数'--用户行为日志历史数据无法获取,故该字段实为从数仓搭建日至今的累积值
) COMMENT '设备主题宽表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwt/dwt_visitor_topic'
TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

insert overwrite table dwt_visitor_topic partition(dt='2020-06-14')
select
nvl(1d_ago.mid_id,old.mid_id),
nvl(1d_ago.brand,old.brand),
nvl(1d_ago.model,old.model),
nvl(1d_ago.channel,old.channel),
nvl(1d_ago.os,old.os),
nvl(1d_ago.area_code,old.area_code),
nvl(1d_ago.version_code,old.version_code),
case when old.mid_id is null and 1d_ago.is_new=1 then '2020-06-14'
when old.mid_id is null and 1d_ago.is_new=0 then '2020-06-13'--无法获取准确的首次登录日期,给定一个数仓搭建日之前的日期
else old.visit_date_first end,
if(1d_ago.mid_id is not null,'2020-06-14',old.visit_date_last),
nvl(1d_ago.visit_count,0),
if(1d_ago.mid_id is null,0,1),
nvl(old.visit_last_7d_count,0)+nvl(1d_ago.visit_count,0)- nvl(7d_ago.visit_count,0),
nvl(old.visit_last_7d_day_count,0)+if(1d_ago.mid_id is null,0,1)- if(7d_ago.mid_id is null,0,1),
nvl(old.visit_last_30d_count,0)+nvl(1d_ago.visit_count,0)- nvl(30d_ago.visit_count,0),
nvl(old.visit_last_30d_day_count,0)+if(1d_ago.mid_id is null,0,1)- if(30d_ago.mid_id is null,0,1),
nvl(old.visit_count,0)+nvl(1d_ago.visit_count,0),
nvl(old.visit_day_count,0)+if(1d_ago.mid_id is null,0,1)
from
(
select
mid_id,
brand,
model,
channel,
os,
area_code,
version_code,
visit_date_first,
visit_date_last,
visit_last_1d_count,
visit_last_1d_day_count,
visit_last_7d_count,
visit_last_7d_day_count,
visit_last_30d_count,
visit_last_30d_day_count,
visit_count,
visit_day_count
from dwt_visitor_topic
where dt=date_add('2020-06-14',-1)
)old
full outer join
(
select
mid_id,
brand,
model,
is_new,
channel,
os,
area_code,
version_code,
visit_count
from dws_visitor_action_daycount
where dt='2020-06-14'
)1d_ago
on old.mid_id=1d_ago.mid_id
left join
(
select
mid_id,
brand,
model,
is_new,
channel,
os,
area_code,
version_code,
visit_count
from dws_visitor_action_daycount
where dt=date_add('2020-06-14',-7)
)7d_ago
on old.mid_id=7d_ago.mid_id
left join
(
select
mid_id,
brand,
model,
is_new,
channel,
os,
area_code,
version_code,
visit_count
from dws_visitor_action_daycount
where dt=date_add('2020-06-14',-30)
)30d_ago
on old.mid_id=30d_ago.mid_id;

二 优惠券主题

1 建表语句

DROP TABLE IF EXISTS dwt_coupon_topic;
CREATE EXTERNAL TABLE dwt_coupon_topic(
`coupon_id` STRING COMMENT '优惠券ID',
`get_last_1d_count` BIGINT COMMENT '最近1日领取次数',
`get_last_7d_count` BIGINT COMMENT '最近7日领取次数',
`get_last_30d_count` BIGINT COMMENT '最近30日领取次数',
`get_count` BIGINT COMMENT '累积领取次数',
`order_last_1d_count` BIGINT COMMENT '最近1日使用某券下单次数',
`order_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日使用某券下单优惠金额',
`order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1日使用某券下单原始金额',
`order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1日使用某券下单最终金额',
`order_last_7d_count` BIGINT COMMENT '最近7日使用某券下单次数',
`order_last_7d_reduce_amount` DECIMAL(16,2) COMMENT '最近7日使用某券下单优惠金额',
`order_last_7d_original_amount` DECIMAL(16,2) COMMENT '最近7日使用某券下单原始金额',
`order_last_7d_final_amount` DECIMAL(16,2) COMMENT '最近7日使用某券下单最终金额',
`order_last_30d_count` BIGINT COMMENT '最近30日使用某券下单次数',
`order_last_30d_reduce_amount` DECIMAL(16,2) COMMENT '最近30日使用某券下单优惠金额',
`order_last_30d_original_amount` DECIMAL(16,2) COMMENT '最近30日使用某券下单原始金额',
`order_last_30d_final_amount` DECIMAL(16,2) COMMENT '最近30日使用某券下单最终金额',
`order_count` BIGINT COMMENT '累积使用(下单)次数',
`order_reduce_amount` DECIMAL(16,2) COMMENT '使用某券累积下单优惠金额',
`order_original_amount` DECIMAL(16,2) COMMENT '使用某券累积下单原始金额',
`order_final_amount` DECIMAL(16,2) COMMENT '使用某券累积下单最终金额',
`payment_last_1d_count` BIGINT COMMENT '最近1日使用某券支付次数',
`payment_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日使用某券优惠金额',
`payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日使用某券支付金额',
`payment_last_7d_count` BIGINT COMMENT '最近7日使用某券支付次数',
`payment_last_7d_reduce_amount` DECIMAL(16,2) COMMENT '最近7日使用某券优惠金额',
`payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日使用某券支付金额',
`payment_last_30d_count` BIGINT COMMENT '最近30日使用某券支付次数',
`payment_last_30d_reduce_amount` DECIMAL(16,2) COMMENT '最近30日使用某券优惠金额',
`payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日使用某券支付金额',
`payment_count` BIGINT COMMENT '累积使用(支付)次数',
`payment_reduce_amount` DECIMAL(16,2) COMMENT '使用某券累积优惠金额',
`payment_amount` DECIMAL(16,2) COMMENT '使用某券累积支付金额',
`expire_last_1d_count` BIGINT COMMENT '最近1日过期次数',
`expire_last_7d_count` BIGINT COMMENT '最近7日过期次数',
`expire_last_30d_count` BIGINT COMMENT '最近30日过期次数',
`expire_count` BIGINT COMMENT '累积过期次数'
)comment '优惠券主题表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwt/dwt_coupon_topic/'
TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

(1)首日导入

insert overwrite table dwt_coupon_topic partition(dt='2020-06-14')
select
id,
nvl(get_last_1d_count,0),
nvl(get_last_7d_count,0),
nvl(get_last_30d_count,0),
nvl(get_count,0),
nvl(order_last_1d_count,0),
nvl(order_last_1d_reduce_amount,0),
nvl(order_last_1d_original_amount,0),
nvl(order_last_1d_final_amount,0),
nvl(order_last_7d_count,0),
nvl(order_last_7d_reduce_amount,0),
nvl(order_last_7d_original_amount,0),
nvl(order_last_7d_final_amount,0),
nvl(order_last_30d_count,0),
nvl(order_last_30d_reduce_amount,0),
nvl(order_last_30d_original_amount,0),
nvl(order_last_30d_final_amount,0),
nvl(order_count,0),
nvl(order_reduce_amount,0),
nvl(order_original_amount,0),
nvl(order_final_amount,0),
nvl(payment_last_1d_count,0),
nvl(payment_last_1d_reduce_amount,0),
nvl(payment_last_1d_amount,0),
nvl(payment_last_7d_count,0),
nvl(payment_last_7d_reduce_amount,0),
nvl(payment_last_7d_amount,0),
nvl(payment_last_30d_count,0),
nvl(payment_last_30d_reduce_amount,0),
nvl(payment_last_30d_amount,0),
nvl(payment_count,0),
nvl(payment_reduce_amount,0),
nvl(payment_amount,0),
nvl(expire_last_1d_count,0),
nvl(expire_last_7d_count,0),
nvl(expire_last_30d_count,0),
nvl(expire_count,0)
from
(
select
id
from dim_coupon_info
where dt='2020-06-14'
)t1
left join
(
select
coupon_id coupon_id,
sum(if(dt='2020-06-14',get_count,0)) get_last_1d_count,
sum(if(dt>=date_add('2020-06-14',-6),get_count,0)) get_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-29),get_count,0)) get_last_30d_count,
sum(get_count) get_count,
sum(if(dt='2020-06-14',order_count,0)) order_last_1d_count,
sum(if(dt='2020-06-14',order_reduce_amount,0)) order_last_1d_reduce_amount,
sum(if(dt='2020-06-14',order_original_amount,0)) order_last_1d_original_amount,
sum(if(dt='2020-06-14',order_final_amount,0)) order_last_1d_final_amount,
sum(if(dt>=date_add('2020-06-14',-6),order_count,0)) order_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),order_reduce_amount,0)) order_last_7d_reduce_amount,
sum(if(dt>=date_add('2020-06-14',-6),order_original_amount,0)) order_last_7d_original_amount,
sum(if(dt>=date_add('2020-06-14',-6),order_final_amount,0)) order_last_7d_final_amount,
sum(if(dt>=date_add('2020-06-14',-29),order_count,0)) order_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),order_reduce_amount,0)) order_last_30d_reduce_amount,
sum(if(dt>=date_add('2020-06-14',-29),order_original_amount,0)) order_last_30d_original_amount,
sum(if(dt>=date_add('2020-06-14',-29),order_final_amount,0)) order_last_30d_final_amount,
sum(order_count) order_count,
sum(order_reduce_amount) order_reduce_amount,
sum(order_original_amount) order_original_amount,
sum(order_final_amount) order_final_amount,
sum(if(dt='2020-06-14',payment_count,0)) payment_last_1d_count,
sum(if(dt='2020-06-14',payment_reduce_amount,0)) payment_last_1d_reduce_amount,
sum(if(dt='2020-06-14',payment_amount,0)) payment_last_1d_amount,
sum(if(dt>=date_add('2020-06-14',-6),payment_count,0)) payment_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),payment_reduce_amount,0)) payment_last_7d_reduce_amount,
sum(if(dt>=date_add('2020-06-14',-6),payment_amount,0)) payment_last_7d_amount,
sum(if(dt>=date_add('2020-06-14',-29),payment_count,0)) payment_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),payment_reduce_amount,0)) payment_last_30d_reduce_amount,
sum(if(dt>=date_add('2020-06-14',-29),payment_amount,0)) payment_last_30d_amount,
sum(payment_count) payment_count,
sum(payment_reduce_amount) payment_reduce_amount,
sum(payment_amount) payment_amount,
sum(if(dt='2020-06-14',expire_count,0)) expire_last_1d_count,
sum(if(dt>=date_add('2020-06-14',-6),expire_count,0)) expire_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-29),expire_count,0)) expire_last_30d_count,
sum(expire_count) expire_count
from dws_coupon_info_daycount
group by coupon_id
)t2
on t1.id=t2.coupon_id;

(2)每日导入

insert overwrite table dwt_coupon_topic partition(dt='2020-06-15')
select
nvl(1d_ago.coupon_id,old.coupon_id),
nvl(1d_ago.get_count,0),
nvl(old.get_last_7d_count,0)+nvl(1d_ago.get_count,0)- nvl(7d_ago.get_count,0),
nvl(old.get_last_30d_count,0)+nvl(1d_ago.get_count,0)- nvl(30d_ago.get_count,0),
nvl(old.get_count,0)+nvl(1d_ago.get_count,0),
nvl(1d_ago.order_count,0),
nvl(1d_ago.order_reduce_amount,0.0),
nvl(1d_ago.order_original_amount,0.0),
nvl(1d_ago.order_final_amount,0.0),
nvl(old.order_last_7d_count,0)+nvl(1d_ago.order_count,0)- nvl(7d_ago.order_count,0),
nvl(old.order_last_7d_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0)- nvl(7d_ago.order_reduce_amount,0.0),
nvl(old.order_last_7d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(7d_ago.order_original_amount,0.0),
nvl(old.order_last_7d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(7d_ago.order_final_amount,0.0),
nvl(old.order_last_30d_count,0)+nvl(1d_ago.order_count,0)- nvl(30d_ago.order_count,0),
nvl(old.order_last_30d_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0)- nvl(30d_ago.order_reduce_amount,0.0),
nvl(old.order_last_30d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(30d_ago.order_original_amount,0.0),
nvl(old.order_last_30d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(30d_ago.order_final_amount,0.0),
nvl(old.order_count,0)+nvl(1d_ago.order_count,0),
nvl(old.order_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0),
nvl(old.order_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0),
nvl(old.order_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0),
nvl(old.payment_last_1d_count,0)+nvl(1d_ago.payment_count,0)- nvl(1d_ago.payment_count,0),
nvl(old.payment_last_1d_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0)- nvl(1d_ago.payment_reduce_amount,0.0),
nvl(old.payment_last_1d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(1d_ago.payment_amount,0.0),
nvl(old.payment_last_7d_count,0)+nvl(1d_ago.payment_count,0)- nvl(7d_ago.payment_count,0),
nvl(old.payment_last_7d_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0)- nvl(7d_ago.payment_reduce_amount,0.0),
nvl(old.payment_last_7d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(7d_ago.payment_amount,0.0),
nvl(old.payment_last_30d_count,0)+nvl(1d_ago.payment_count,0)- nvl(30d_ago.payment_count,0),
nvl(old.payment_last_30d_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0)- nvl(30d_ago.payment_reduce_amount,0.0),
nvl(old.payment_last_30d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(30d_ago.payment_amount,0.0),
nvl(old.payment_count,0)+nvl(1d_ago.payment_count,0),
nvl(old.payment_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0),
nvl(old.payment_amount,0.0)+nvl(1d_ago.payment_amount,0.0),
nvl(1d_ago.expire_count,0),
nvl(old.expire_last_7d_count,0)+nvl(1d_ago.expire_count,0)- nvl(7d_ago.expire_count,0),
nvl(old.expire_last_30d_count,0)+nvl(1d_ago.expire_count,0)- nvl(30d_ago.expire_count,0),
nvl(old.expire_count,0)+nvl(1d_ago.expire_count,0)
from
(
select
coupon_id,
get_last_1d_count,
get_last_7d_count,
get_last_30d_count,
get_count,
order_last_1d_count,
order_last_1d_reduce_amount,
order_last_1d_original_amount,
order_last_1d_final_amount,
order_last_7d_count,
order_last_7d_reduce_amount,
order_last_7d_original_amount,
order_last_7d_final_amount,
order_last_30d_count,
order_last_30d_reduce_amount,
order_last_30d_original_amount,
order_last_30d_final_amount,
order_count,
order_reduce_amount,
order_original_amount,
order_final_amount,
payment_last_1d_count,
payment_last_1d_reduce_amount,
payment_last_1d_amount,
payment_last_7d_count,
payment_last_7d_reduce_amount,
payment_last_7d_amount,
payment_last_30d_count,
payment_last_30d_reduce_amount,
payment_last_30d_amount,
payment_count,
payment_reduce_amount,
payment_amount,
expire_last_1d_count,
expire_last_7d_count,
expire_last_30d_count,
expire_count
from dwt_coupon_topic
where dt=date_add('2020-06-15',-1)
)old
full outer join
(
select
coupon_id,
get_count,
order_count,
order_reduce_amount,
order_original_amount,
order_final_amount,
payment_count,
payment_reduce_amount,
payment_amount,
expire_count
from dws_coupon_info_daycount
where dt='2020-06-15'
)1d_ago
on old.coupon_id=1d_ago.coupon_id
left join
(
select
coupon_id,
get_count,
order_count,
order_reduce_amount,
order_original_amount,
order_final_amount,
payment_count,
payment_reduce_amount,
payment_amount,
expire_count
from dws_coupon_info_daycount
where dt=date_add('2020-06-15',-7)
)7d_ago
on old.coupon_id=7d_ago.coupon_id
left join
(
select
coupon_id,
get_count,
order_count,
order_reduce_amount,
order_original_amount,
order_final_amount,
payment_count,
payment_reduce_amount,
payment_amount,
expire_count
from dws_coupon_info_daycount
where dt=date_add('2020-06-15',-30)
)30d_ago
on old.coupon_id=30d_ago.coupon_id;

三 活动主题

1 建表语句

DROP TABLE IF EXISTS dwt_activity_topic;
CREATE EXTERNAL TABLE dwt_activity_topic(
`activity_rule_id` STRING COMMENT '活动规则ID',
`activity_id` STRING
COMMENT '活动ID',
`order_last_1d_count` BIGINT COMMENT '最近1日参与某活动某规则下单次数',
`order_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则下单优惠金额',
`order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则下单原始金额',
`order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则下单最终金额',
`order_count` BIGINT COMMENT '参与某活动某规则累积下单次数',
`order_reduce_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积下单优惠金额',
`order_original_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积下单原始金额',
`order_final_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积下单最终金额',
`payment_last_1d_count` BIGINT COMMENT '最近1日参与某活动某规则支付次数',
`payment_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则支付优惠金额',
`payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则支付金额',
`payment_count` BIGINT COMMENT '参与某活动某规则累积支付次数',
`payment_reduce_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积支付优惠金额',
`payment_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积支付金额'
) COMMENT '活动主题宽表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwt/dwt_activity_topic/'
TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

(1)首日导入

insert overwrite table dwt_activity_topic partition(dt='2020-06-14')
select
t1.activity_rule_id,
t1.activity_id,
nvl(order_last_1d_count,0),
nvl(order_last_1d_reduce_amount,0),
nvl(order_last_1d_original_amount,0),
nvl(order_last_1d_final_amount,0),
nvl(order_count,0),
nvl(order_reduce_amount,0),
nvl(order_original_amount,0),
nvl(order_final_amount,0),
nvl(payment_last_1d_count,0),
nvl(payment_last_1d_reduce_amount,0),
nvl(payment_last_1d_amount,0),
nvl(payment_count,0),
nvl(payment_reduce_amount,0),
nvl(payment_amount,0)
from
(
select
activity_rule_id,
activity_id
from dim_activity_rule_info
where dt='2020-06-14'
)t1
left join
(
select
activity_rule_id,
activity_id,
sum(if(dt='2020-06-14',order_count,0)) order_last_1d_count,
sum(if(dt='2020-06-14',order_reduce_amount,0)) order_last_1d_reduce_amount,
sum(if(dt='2020-06-14',order_original_amount,0)) order_last_1d_original_amount,
sum(if(dt='2020-06-14',order_final_amount,0)) order_last_1d_final_amount,
sum(order_count) order_count,
sum(order_reduce_amount) order_reduce_amount,
sum(order_original_amount) order_original_amount,
sum(order_final_amount) order_final_amount,
sum(if(dt='2020-06-14',payment_count,0)) payment_last_1d_count,
sum(if(dt='2020-06-14',payment_reduce_amount,0)) payment_last_1d_reduce_amount,
sum(if(dt='2020-06-14',payment_amount,0)) payment_last_1d_amount,
sum(payment_count) payment_count,
sum(payment_reduce_amount) payment_reduce_amount,
sum(payment_amount) payment_amount
from dws_activity_info_daycount
group by activity_rule_id,activity_id
)t2
on t1.activity_rule_id=t2.activity_rule_id
and t1.activity_id=t2.activity_id;

(2)每日导入

insert overwrite table dwt_activity_topic partition(dt='2020-
insert overwrite table dwt_activity_topic partition(dt='2020-06-15')
select
nvl(1d_ago.activity_rule_id,old.activity_rule_id),
nvl(1d_ago.activity_id,old.activity_id),
nvl(1d_ago.order_count,0),
nvl(1d_ago.order_reduce_amount,0.0),
nvl(1d_ago.order_original_amount,0.0),
nvl(1d_ago.order_final_amount,0.0),
nvl(old.order_count,0)+nvl(1d_ago.order_count,0),
nvl(old.order_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0),
nvl(old.order_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0),
nvl(old.order_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0),
nvl(1d_ago.payment_count,0),
nvl(1d_ago.payment_reduce_amount,0.0),
nvl(1d_ago.payment_amount,0.0),
nvl(old.payment_count,0)+nvl(1d_ago.payment_count,0),
nvl(old.payment_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0),
nvl(old.payment_amount,0.0)+nvl(1d_ago.payment_amount,0.0)
from
(
select
activity_rule_id,
activity_id,
order_count,
order_reduce_amount,
order_original_amount,
order_final_amount,
payment_count,
payment_reduce_amount,
payment_amount
from dwt_activity_topic
where dt=date_add('2020-06-15',-1)
)old
full outer join
(
select
activity_rule_id,
activity_id,
order_count,
order_reduce_amount,
order_original_amount,
order_final_amount,
payment_count,
payment_reduce_amount,
payment_amount
from dws_activity_info_daycount
where dt='2020-06-15'
)1d_ago
on old.activity_rule_id=1d_ago.activity_rule_id;

四 地区主题

1 建表语句

DROP TABLE IF EXISTS dwt_area_topic;
CREATE EXTERNAL TABLE dwt_area_topic(
`province_id` STRING COMMENT '编号',
`visit_last_1d_count` BIGINT COMMENT '最近1日访客访问次数',
`login_last_1d_count` BIGINT COMMENT '最近1日用户访问次数',
`visit_last_7d_count` BIGINT COMMENT '最近7访客访问次数',
`login_last_7d_count` BIGINT COMMENT '最近7日用户访问次数',
`visit_last_30d_count` BIGINT COMMENT '最近30日访客访问次数',
`login_last_30d_count` BIGINT COMMENT '最近30日用户访问次数',
`visit_count` BIGINT COMMENT '累积访客访问次数',
`login_count` BIGINT COMMENT '累积用户访问次数',
`order_last_1d_count` BIGINT COMMENT '最近1天下单次数',
`order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1天下单原始金额',
`order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1天下单最终金额',
`order_last_7d_count` BIGINT COMMENT '最近7天下单次数',
`order_last_7d_original_amount` DECIMAL(16,2) COMMENT '最近7天下单原始金额',
`order_last_7d_final_amount` DECIMAL(16,2) COMMENT '最近7天下单最终金额',
`order_last_30d_count` BIGINT COMMENT '最近30天下单次数',
`order_last_30d_original_amount` DECIMAL(16,2) COMMENT '最近30天下单原始金额',
`order_last_30d_final_amount` DECIMAL(16,2) COMMENT '最近30天下单最终金额',
`order_count` BIGINT COMMENT '累积下单次数',
`order_original_amount` DECIMAL(16,2) COMMENT '累积下单原始金额',
`order_final_amount` DECIMAL(16,2) COMMENT '累积下单最终金额',
`payment_last_1d_count` BIGINT COMMENT '最近1天支付次数',
`payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1天支付金额',
`payment_last_7d_count` BIGINT COMMENT '最近7天支付次数',
`payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7天支付金额',
`payment_last_30d_count` BIGINT COMMENT '最近30天支付次数',
`payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30天支付金额',
`payment_count` BIGINT COMMENT '累积支付次数',
`payment_amount` DECIMAL(16,2) COMMENT '累积支付金额',
`refund_order_last_1d_count` BIGINT COMMENT '最近1天退单次数',
`refund_order_last_1d_amount` DECIMAL(16,2) COMMENT '最近1天退单金额',
`refund_order_last_7d_count` BIGINT COMMENT '最近7天退单次数',
`refund_order_last_7d_amount` DECIMAL(16,2) COMMENT '最近7天退单金额',
`refund_order_last_30d_count` BIGINT COMMENT '最近30天退单次数',
`refund_order_last_30d_amount` DECIMAL(16,2) COMMENT '最近30天退单金额',
`refund_order_count` BIGINT COMMENT '累积退单次数',
`refund_order_amount` DECIMAL(16,2) COMMENT '累积退单金额',
`refund_payment_last_1d_count` BIGINT COMMENT '最近1天退款次数',
`refund_payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1天退款金额',
`refund_payment_last_7d_count` BIGINT COMMENT '最近7天退款次数',
`refund_payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7天退款金额',
`refund_payment_last_30d_count` BIGINT COMMENT '最近30天退款次数',
`refund_payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30天退款金额',
`refund_payment_count` BIGINT COMMENT '累积退款次数',
`refund_payment_amount` DECIMAL(16,2) COMMENT '累积退款金额'
) COMMENT '地区主题宽表'
PARTITIONED BY (`dt` STRING)
STORED AS ORC
LOCATION '/warehouse/gmall/dwt/dwt_area_topic/'
TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

(1)首日导入

insert overwrite table dwt_area_topic partition(dt='2020-06-14')
select
id,
nvl(visit_last_1d_count,0),
nvl(login_last_1d_count,0),
nvl(visit_last_7d_count,0),
nvl(login_last_7d_count,0),
nvl(visit_last_30d_count,0),
nvl(login_last_30d_count,0),
nvl(visit_count,0),
nvl(login_count,0),
nvl(order_last_1d_count,0),
nvl(order_last_1d_original_amount,0),
nvl(order_last_1d_final_amount,0),
nvl(order_last_7d_count,0),
nvl(order_last_7d_original_amount,0),
nvl(order_last_7d_final_amount,0),
nvl(order_last_30d_count,0),
nvl(order_last_30d_original_amount,0),
nvl(order_last_30d_final_amount,0),
nvl(order_count,0),
nvl(order_original_amount,0),
nvl(order_final_amount,0),
nvl(payment_last_1d_count,0),
nvl(payment_last_1d_amount,0),
nvl(payment_last_7d_count,0),
nvl(payment_last_7d_amount,0),
nvl(payment_last_30d_count,0),
nvl(payment_last_30d_amount,0),
nvl(payment_count,0),
nvl(payment_amount,0),
nvl(refund_order_last_1d_count,0),
nvl(refund_order_last_1d_amount,0),
nvl(refund_order_last_7d_count,0),
nvl(refund_order_last_7d_amount,0),
nvl(refund_order_last_30d_count,0),
nvl(refund_order_last_30d_amount,0),
nvl(refund_order_count,0),
nvl(refund_order_amount,0),
nvl(refund_payment_last_1d_count,0),
nvl(refund_payment_last_1d_amount,0),
nvl(refund_payment_last_7d_count,0),
nvl(refund_payment_last_7d_amount,0),
nvl(refund_payment_last_30d_count,0),
nvl(refund_payment_last_30d_amount,0),
nvl(refund_payment_count,0),
nvl(refund_payment_amount,0)
from
(
select
id
from dim_base_province
)t1
left join
(
select
province_id province_id,
sum(if(dt='2020-06-14',visit_count,0)) visit_last_1d_count,
sum(if(dt='2020-06-14',login_count,0)) login_last_1d_count,
sum(if(dt>=date_add('2020-06-14',-6),visit_count,0)) visit_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),login_count,0)) login_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-29),visit_count,0)) visit_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),login_count,0)) login_last_30d_count,
sum(visit_count) visit_count,
sum(login_count) login_count,
sum(if(dt='2020-06-14',order_count,0)) order_last_1d_count,
sum(if(dt='2020-06-14',order_original_amount,0)) order_last_1d_original_amount,
sum(if(dt='2020-06-14',order_final_amount,0)) order_last_1d_final_amount,
sum(if(dt>=date_add('2020-06-14',-6),order_count,0)) order_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),order_original_amount,0)) order_last_7d_original_amount,
sum(if(dt>=date_add('2020-06-14',-6),order_final_amount,0)) order_last_7d_final_amount,
sum(if(dt>=date_add('2020-06-14',-29),order_count,0)) order_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),order_original_amount,0)) order_last_30d_original_amount,
sum(if(dt>=date_add('2020-06-14',-29),order_final_amount,0)) order_last_30d_final_amount,
sum(order_count) order_count,
sum(order_original_amount) order_original_amount,
sum(order_final_amount) order_final_amount,
sum(if(dt='2020-06-14',payment_count,0)) payment_last_1d_count,
sum(if(dt='2020-06-14',payment_amount,0)) payment_last_1d_amount,
sum(if(dt>=date_add('2020-06-14',-6),payment_count,0)) payment_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),payment_amount,0)) payment_last_7d_amount,
sum(if(dt>=date_add('2020-06-14',-29),payment_count,0)) payment_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),payment_amount,0)) payment_last_30d_amount,
sum(payment_count) payment_count,
sum(payment_amount) payment_amount,
sum(if(dt='2020-06-14',refund_order_count,0)) refund_order_last_1d_count,
sum(if(dt='2020-06-14',refund_order_amount,0)) refund_order_last_1d_amount,
sum(if(dt>=date_add('2020-06-14',-6),refund_order_count,0)) refund_order_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),refund_order_amount,0)) refund_order_last_7d_amount,
sum(if(dt>=date_add('2020-06-14',-29),refund_order_count,0)) refund_order_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),refund_order_amount,0)) refund_order_last_30d_amount,
sum(refund_order_count) refund_order_count,
sum(refund_order_amount) refund_order_amount,
sum(if(dt='2020-06-14',refund_payment_count,0)) refund_payment_last_1d_count,
sum(if(dt='2020-06-14',refund_payment_amount,0)) refund_payment_last_1d_amount,
sum(if(dt>=date_add('2020-06-14',-6),refund_payment_count,0)) refund_payment_last_7d_count,
sum(if(dt>=date_add('2020-06-14',-6),refund_payment_amount,0)) refund_payment_last_7d_amount,
sum(if(dt>=date_add('2020-06-14',-29),refund_payment_count,0)) refund_payment_last_30d_count,
sum(if(dt>=date_add('2020-06-14',-29),refund_payment_amount,0)) refund_payment_last_30d_amount,
sum(refund_payment_count) refund_payment_count,
sum(refund_payment_amount) refund_payment_amount
from dws_area_stats_daycount
group by province_id
)t2
on t1.id=t2.province_id;

(2)每日导入

insert overwrite table dwt_area_topic partition(dt='2020-06-15')
select
nvl(old.province_id, 1d_ago.province_id),
nvl(1d_ago.visit_count,0),
nvl(1d_ago.login_count,0),
nvl(old.visit_last_7d_count,0)+nvl(1d_ago.visit_count,0)- nvl(7d_ago.visit_count,0),
nvl(old.login_last_7d_count,0)+nvl(1d_ago.login_count,0)- nvl(7d_ago.login_count,0),
nvl(old.visit_last_30d_count,0)+nvl(1d_ago.visit_count,0)- nvl(30d_ago.visit_count,0),
nvl(old.login_last_30d_count,0)+nvl(1d_ago.login_count,0)- nvl(30d_ago.login_count,0),
nvl(old.visit_count,0)+nvl(1d_ago.visit_count,0),
nvl(old.login_count,0)+nvl(1d_ago.login_count,0),
nvl(1d_ago.order_count,0),
nvl(1d_ago.order_original_amount,0.0),
nvl(1d_ago.order_final_amount,0.0),
nvl(old.order_last_7d_count,0)+nvl(1d_ago.order_count,0)- nvl(7d_ago.order_count,0),
nvl(old.order_last_7d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(7d_ago.order_original_amount,0.0),
nvl(old.order_last_7d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(7d_ago.order_final_amount,0.0),
nvl(old.order_last_30d_count,0)+nvl(1d_ago.order_count,0)- nvl(30d_ago.order_count,0),
nvl(old.order_last_30d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(30d_ago.order_original_amount,0.0),
nvl(old.order_last_30d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(30d_ago.order_final_amount,0.0),
nvl(old.order_count,0)+nvl(1d_ago.order_count,0),
nvl(old.order_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0),
nvl(old.order_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0),
nvl(1d_ago.payment_count,0),
nvl(1d_ago.payment_amount,0.0),
nvl(old.payment_last_7d_count,0)+nvl(1d_ago.payment_count,0)- nvl(7d_ago.payment_count,0),
nvl(old.payment_last_7d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(7d_ago.payment_amount,0.0),
nvl(old.payment_last_30d_count,0)+nvl(1d_ago.payment_count,0)- nvl(30d_ago.payment_count,0),
nvl(old.payment_last_30d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(30d_ago.payment_amount,0.0),
nvl(old.payment_count,0)+nvl(1d_ago.payment_count,0),
nvl(old.payment_amount,0.0)+nvl(1d_ago.payment_amount,0.0),
nvl(1d_ago.refund_order_count,0),
nvl(1d_ago.refund_order_amount,0.0),
nvl(old.refund_order_last_7d_count,0)+nvl(1d_ago.refund_order_count,0)- nvl(7d_ago.refund_order_count,0),
nvl(old.refund_order_last_7d_amount,0.0)+nvl(1d_ago.refund_order_amount,0.0)- nvl(7d_ago.refund_order_amount,0.0),
nvl(old.refund_order_last_30d_count,0)+nvl(1d_ago.refund_order_count,0)- nvl(30d_ago.refund_order_count,0),
nvl(old.refund_order_last_30d_amount,0.0)+nvl(1d_ago.refund_order_amount,0.0)- nvl(30d_ago.refund_order_amount,0.0),
nvl(old.refund_order_count,0)+nvl(1d_ago.refund_order_count,0),
nvl(old.refund_order_amount,0.0)+nvl(1d_ago.refund_order_amount,0.0),
nvl(1d_ago.refund_payment_count,0),
nvl(1d_ago.refund_payment_amount,0.0),
nvl(old.refund_payment_last_7d_count,0)+nvl(1d_ago.refund_payment_count,0)- nvl(7d_ago.refund_payment_count,0),
nvl(old.refund_payment_last_7d_amount,0.0)+nvl(1d_ago.refund_payment_amount,0.0)- nvl(7d_ago.refund_payment_amount,0.0),
nvl(old.refund_payment_last_30d_count,0)+nvl(1d_ago.refund_payment_count,0)- nvl(30d_ago.refund_payment_count,0),
nvl(old.refund_payment_last_30d_amount,0.0)+nvl(1d_ago.refund_payment_amount,0.0)- nvl(30d_ago.refund_payment_amount,0.0),
nvl(old.refund_payment_count,0)+nvl(1d_ago.refund_payment_count,0),
nvl(old.refund_payment_amount,0.0)+nvl(1d_ago.refund_payment_amount,0.0)
from
(
select
province_id,
visit_last_1d_count,
login_last_1d_count,
visit_last_7d_count,
login_last_7d_count,
visit_last_30d_count,
login_last_30d_count,
visit_count,
login_count,
order_last_1d_count,
order_last_1d_original_amount,
order_last_1d_final_amount,
order_last_7d_count,
order_last_7d_original_amount,
order_last_7d_final_amount,
order_last_30d_count,
order_last_30d_original_amount,
order_last_30d_final_amount,
order_count,
order_original_amount,
order_final_amount,
payment_last_1d_count,
payment_last_1d_amount,
payment_last_7d_count,
payment_last_7d_amount,
payment_last_30d_count,
payment_last_30d_amount,
payment_count,
payment_amount,
refund_order_last_1d_count,
refund_order_last_1d_amount,
refund_order_last_7d_count,
refund_order_last_7d_amount,
refund_order_last_30d_count,
refund_order_last_30d_amount,
refund_order_count,
refund_order_amount,
refund_payment_last_1d_count,
refund_payment_last_1d_amount,
refund_payment_last_7d_count,
refund_payment_last_7d_amount,
refund_payment_last_30d_count,
refund_payment_last_30d_amount,
refund_payment_count,
refund_payment_amount
from dwt_area_topic
where dt=date_add('2020-06-15',-1)
)old
full outer join
(
select
province_id,
visit_count,
login_count,
order_count,
order_original_amount,
order_final_amount,
payment_count,
payment_amount,
refund_order_count,
refund_order_amount,
refund_payment_count,
refund_payment_amount
from dws_area_stats_daycount
where dt='2020-06-15'
)1d_ago
on old.province_id=1d_ago.province_id
left join
(
select
province_id,
visit_count,
login_count,
order_count,
order_original_amount,
order_final_amount,
payment_count,
payment_amount,
refund_order_count,
refund_order_amount,
refund_payment_count,
refund_payment_amount
from dws_area_stats_daycount
where dt=date_add('2020-06-15',-7)
)7d_ago
on old.province_id= 7d_ago.province_id
left join
(
select
province_id,
visit_count,
login_count,
order_count,
order_original_amount,
order_final_amount,
payment_count,
payment_amount,
refund_order_count,
refund_order_amount,
refund_payment_count,
refund_payment_amount
from dws_area_stats_daycount
where dt=date_add('2020-06-15',-30)
)30d_ago
on old.province_id= 30d_ago.province_id;

最后

以上就是舒适小虾米为你收集整理的【电商数仓】数仓搭建之数据主题(data warehouse topic-- DWT)层(DWT层之访客、优惠券、活动、地区主题)一 访客主题二 优惠券主题三 活动主题四 地区主题的全部内容,希望文章能够帮你解决【电商数仓】数仓搭建之数据主题(data warehouse topic-- DWT)层(DWT层之访客、优惠券、活动、地区主题)一 访客主题二 优惠券主题三 活动主题四 地区主题所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(47)

评论列表共有 0 条评论

立即
投稿
返回
顶部