我是靠谱客的博主 舒适小虾米,这篇文章主要介绍【电商数仓】数仓搭建之数据主题(data warehouse topic-- DWT)层(DWT层之访客、优惠券、活动、地区主题)一 访客主题二 优惠券主题三 活动主题四 地区主题,现在分享给大家,希望可以做个参考。

文章目录

  • 一 访客主题
    • 1 建表语句
    • 2 数据导入
  • 二 优惠券主题
    • 1 建表语句
    • 2 数据导入
      • (1)首日导入
      • (2)每日导入
  • 三 活动主题
    • 1 建表语句
    • 2 数据导入
      • (1)首日导入
      • (2)每日导入
  • 四 地区主题
    • 1 建表语句
    • 2 数据导入
      • (1)首日导入
      • (2)每日导入

一 访客主题

1 建表语句

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
DROP TABLE IF EXISTS dwt_visitor_topic; CREATE EXTERNAL TABLE dwt_visitor_topic ( `mid_id` STRING COMMENT '设备id', `brand` STRING COMMENT '手机品牌', `model` STRING COMMENT '手机型号', `channel` ARRAY<STRING> COMMENT '渠道', `os` ARRAY<STRING> COMMENT '操作系统', `area_code` ARRAY<STRING> COMMENT '地区ID', `version_code` ARRAY<STRING> COMMENT '应用版本', `visit_date_first` STRING COMMENT '首次访问时间', `visit_date_last` STRING COMMENT '末次访问时间', `visit_last_1d_count` BIGINT COMMENT '最近1日访问次数', `visit_last_1d_day_count` BIGINT COMMENT '最近1日访问天数', `visit_last_7d_count` BIGINT COMMENT '最近7日访问次数', `visit_last_7d_day_count` BIGINT COMMENT '最近7日访问天数', `visit_last_30d_count` BIGINT COMMENT '最近30日访问次数', `visit_last_30d_day_count` BIGINT COMMENT '最近30日访问天数', `visit_count` BIGINT COMMENT '累积访问次数',--用户行为日志历史数据无法获取,故该字段实为从数仓搭建日至今的累积值 `visit_day_count` BIGINT COMMENT '累积访问天数'--用户行为日志历史数据无法获取,故该字段实为从数仓搭建日至今的累积值 ) COMMENT '设备主题宽表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dwt/dwt_visitor_topic' TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
insert overwrite table dwt_visitor_topic partition(dt='2020-06-14') select nvl(1d_ago.mid_id,old.mid_id), nvl(1d_ago.brand,old.brand), nvl(1d_ago.model,old.model), nvl(1d_ago.channel,old.channel), nvl(1d_ago.os,old.os), nvl(1d_ago.area_code,old.area_code), nvl(1d_ago.version_code,old.version_code), case when old.mid_id is null and 1d_ago.is_new=1 then '2020-06-14' when old.mid_id is null and 1d_ago.is_new=0 then '2020-06-13'--无法获取准确的首次登录日期,给定一个数仓搭建日之前的日期 else old.visit_date_first end, if(1d_ago.mid_id is not null,'2020-06-14',old.visit_date_last), nvl(1d_ago.visit_count,0), if(1d_ago.mid_id is null,0,1), nvl(old.visit_last_7d_count,0)+nvl(1d_ago.visit_count,0)- nvl(7d_ago.visit_count,0), nvl(old.visit_last_7d_day_count,0)+if(1d_ago.mid_id is null,0,1)- if(7d_ago.mid_id is null,0,1), nvl(old.visit_last_30d_count,0)+nvl(1d_ago.visit_count,0)- nvl(30d_ago.visit_count,0), nvl(old.visit_last_30d_day_count,0)+if(1d_ago.mid_id is null,0,1)- if(30d_ago.mid_id is null,0,1), nvl(old.visit_count,0)+nvl(1d_ago.visit_count,0), nvl(old.visit_day_count,0)+if(1d_ago.mid_id is null,0,1) from ( select mid_id, brand, model, channel, os, area_code, version_code, visit_date_first, visit_date_last, visit_last_1d_count, visit_last_1d_day_count, visit_last_7d_count, visit_last_7d_day_count, visit_last_30d_count, visit_last_30d_day_count, visit_count, visit_day_count from dwt_visitor_topic where dt=date_add('2020-06-14',-1) )old full outer join ( select mid_id, brand, model, is_new, channel, os, area_code, version_code, visit_count from dws_visitor_action_daycount where dt='2020-06-14' )1d_ago on old.mid_id=1d_ago.mid_id left join ( select mid_id, brand, model, is_new, channel, os, area_code, version_code, visit_count from dws_visitor_action_daycount where dt=date_add('2020-06-14',-7) )7d_ago on old.mid_id=7d_ago.mid_id left join ( select mid_id, brand, model, is_new, channel, os, area_code, version_code, visit_count from dws_visitor_action_daycount where dt=date_add('2020-06-14',-30) )30d_ago on old.mid_id=30d_ago.mid_id;

二 优惠券主题

1 建表语句

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
DROP TABLE IF EXISTS dwt_coupon_topic; CREATE EXTERNAL TABLE dwt_coupon_topic( `coupon_id` STRING COMMENT '优惠券ID', `get_last_1d_count` BIGINT COMMENT '最近1日领取次数', `get_last_7d_count` BIGINT COMMENT '最近7日领取次数', `get_last_30d_count` BIGINT COMMENT '最近30日领取次数', `get_count` BIGINT COMMENT '累积领取次数', `order_last_1d_count` BIGINT COMMENT '最近1日使用某券下单次数', `order_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日使用某券下单优惠金额', `order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1日使用某券下单原始金额', `order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1日使用某券下单最终金额', `order_last_7d_count` BIGINT COMMENT '最近7日使用某券下单次数', `order_last_7d_reduce_amount` DECIMAL(16,2) COMMENT '最近7日使用某券下单优惠金额', `order_last_7d_original_amount` DECIMAL(16,2) COMMENT '最近7日使用某券下单原始金额', `order_last_7d_final_amount` DECIMAL(16,2) COMMENT '最近7日使用某券下单最终金额', `order_last_30d_count` BIGINT COMMENT '最近30日使用某券下单次数', `order_last_30d_reduce_amount` DECIMAL(16,2) COMMENT '最近30日使用某券下单优惠金额', `order_last_30d_original_amount` DECIMAL(16,2) COMMENT '最近30日使用某券下单原始金额', `order_last_30d_final_amount` DECIMAL(16,2) COMMENT '最近30日使用某券下单最终金额', `order_count` BIGINT COMMENT '累积使用(下单)次数', `order_reduce_amount` DECIMAL(16,2) COMMENT '使用某券累积下单优惠金额', `order_original_amount` DECIMAL(16,2) COMMENT '使用某券累积下单原始金额', `order_final_amount` DECIMAL(16,2) COMMENT '使用某券累积下单最终金额', `payment_last_1d_count` BIGINT COMMENT '最近1日使用某券支付次数', `payment_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日使用某券优惠金额', `payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日使用某券支付金额', `payment_last_7d_count` BIGINT COMMENT '最近7日使用某券支付次数', `payment_last_7d_reduce_amount` DECIMAL(16,2) COMMENT '最近7日使用某券优惠金额', `payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7日使用某券支付金额', `payment_last_30d_count` BIGINT COMMENT '最近30日使用某券支付次数', `payment_last_30d_reduce_amount` DECIMAL(16,2) COMMENT '最近30日使用某券优惠金额', `payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30日使用某券支付金额', `payment_count` BIGINT COMMENT '累积使用(支付)次数', `payment_reduce_amount` DECIMAL(16,2) COMMENT '使用某券累积优惠金额', `payment_amount` DECIMAL(16,2) COMMENT '使用某券累积支付金额', `expire_last_1d_count` BIGINT COMMENT '最近1日过期次数', `expire_last_7d_count` BIGINT COMMENT '最近7日过期次数', `expire_last_30d_count` BIGINT COMMENT '最近30日过期次数', `expire_count` BIGINT COMMENT '累积过期次数' )comment '优惠券主题表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dwt/dwt_coupon_topic/' TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

(1)首日导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
insert overwrite table dwt_coupon_topic partition(dt='2020-06-14') select id, nvl(get_last_1d_count,0), nvl(get_last_7d_count,0), nvl(get_last_30d_count,0), nvl(get_count,0), nvl(order_last_1d_count,0), nvl(order_last_1d_reduce_amount,0), nvl(order_last_1d_original_amount,0), nvl(order_last_1d_final_amount,0), nvl(order_last_7d_count,0), nvl(order_last_7d_reduce_amount,0), nvl(order_last_7d_original_amount,0), nvl(order_last_7d_final_amount,0), nvl(order_last_30d_count,0), nvl(order_last_30d_reduce_amount,0), nvl(order_last_30d_original_amount,0), nvl(order_last_30d_final_amount,0), nvl(order_count,0), nvl(order_reduce_amount,0), nvl(order_original_amount,0), nvl(order_final_amount,0), nvl(payment_last_1d_count,0), nvl(payment_last_1d_reduce_amount,0), nvl(payment_last_1d_amount,0), nvl(payment_last_7d_count,0), nvl(payment_last_7d_reduce_amount,0), nvl(payment_last_7d_amount,0), nvl(payment_last_30d_count,0), nvl(payment_last_30d_reduce_amount,0), nvl(payment_last_30d_amount,0), nvl(payment_count,0), nvl(payment_reduce_amount,0), nvl(payment_amount,0), nvl(expire_last_1d_count,0), nvl(expire_last_7d_count,0), nvl(expire_last_30d_count,0), nvl(expire_count,0) from ( select id from dim_coupon_info where dt='2020-06-14' )t1 left join ( select coupon_id coupon_id, sum(if(dt='2020-06-14',get_count,0)) get_last_1d_count, sum(if(dt>=date_add('2020-06-14',-6),get_count,0)) get_last_7d_count, sum(if(dt>=date_add('2020-06-14',-29),get_count,0)) get_last_30d_count, sum(get_count) get_count, sum(if(dt='2020-06-14',order_count,0)) order_last_1d_count, sum(if(dt='2020-06-14',order_reduce_amount,0)) order_last_1d_reduce_amount, sum(if(dt='2020-06-14',order_original_amount,0)) order_last_1d_original_amount, sum(if(dt='2020-06-14',order_final_amount,0)) order_last_1d_final_amount, sum(if(dt>=date_add('2020-06-14',-6),order_count,0)) order_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),order_reduce_amount,0)) order_last_7d_reduce_amount, sum(if(dt>=date_add('2020-06-14',-6),order_original_amount,0)) order_last_7d_original_amount, sum(if(dt>=date_add('2020-06-14',-6),order_final_amount,0)) order_last_7d_final_amount, sum(if(dt>=date_add('2020-06-14',-29),order_count,0)) order_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),order_reduce_amount,0)) order_last_30d_reduce_amount, sum(if(dt>=date_add('2020-06-14',-29),order_original_amount,0)) order_last_30d_original_amount, sum(if(dt>=date_add('2020-06-14',-29),order_final_amount,0)) order_last_30d_final_amount, sum(order_count) order_count, sum(order_reduce_amount) order_reduce_amount, sum(order_original_amount) order_original_amount, sum(order_final_amount) order_final_amount, sum(if(dt='2020-06-14',payment_count,0)) payment_last_1d_count, sum(if(dt='2020-06-14',payment_reduce_amount,0)) payment_last_1d_reduce_amount, sum(if(dt='2020-06-14',payment_amount,0)) payment_last_1d_amount, sum(if(dt>=date_add('2020-06-14',-6),payment_count,0)) payment_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),payment_reduce_amount,0)) payment_last_7d_reduce_amount, sum(if(dt>=date_add('2020-06-14',-6),payment_amount,0)) payment_last_7d_amount, sum(if(dt>=date_add('2020-06-14',-29),payment_count,0)) payment_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),payment_reduce_amount,0)) payment_last_30d_reduce_amount, sum(if(dt>=date_add('2020-06-14',-29),payment_amount,0)) payment_last_30d_amount, sum(payment_count) payment_count, sum(payment_reduce_amount) payment_reduce_amount, sum(payment_amount) payment_amount, sum(if(dt='2020-06-14',expire_count,0)) expire_last_1d_count, sum(if(dt>=date_add('2020-06-14',-6),expire_count,0)) expire_last_7d_count, sum(if(dt>=date_add('2020-06-14',-29),expire_count,0)) expire_last_30d_count, sum(expire_count) expire_count from dws_coupon_info_daycount group by coupon_id )t2 on t1.id=t2.coupon_id;

(2)每日导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
insert overwrite table dwt_coupon_topic partition(dt='2020-06-15') select nvl(1d_ago.coupon_id,old.coupon_id), nvl(1d_ago.get_count,0), nvl(old.get_last_7d_count,0)+nvl(1d_ago.get_count,0)- nvl(7d_ago.get_count,0), nvl(old.get_last_30d_count,0)+nvl(1d_ago.get_count,0)- nvl(30d_ago.get_count,0), nvl(old.get_count,0)+nvl(1d_ago.get_count,0), nvl(1d_ago.order_count,0), nvl(1d_ago.order_reduce_amount,0.0), nvl(1d_ago.order_original_amount,0.0), nvl(1d_ago.order_final_amount,0.0), nvl(old.order_last_7d_count,0)+nvl(1d_ago.order_count,0)- nvl(7d_ago.order_count,0), nvl(old.order_last_7d_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0)- nvl(7d_ago.order_reduce_amount,0.0), nvl(old.order_last_7d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(7d_ago.order_original_amount,0.0), nvl(old.order_last_7d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(7d_ago.order_final_amount,0.0), nvl(old.order_last_30d_count,0)+nvl(1d_ago.order_count,0)- nvl(30d_ago.order_count,0), nvl(old.order_last_30d_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0)- nvl(30d_ago.order_reduce_amount,0.0), nvl(old.order_last_30d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(30d_ago.order_original_amount,0.0), nvl(old.order_last_30d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(30d_ago.order_final_amount,0.0), nvl(old.order_count,0)+nvl(1d_ago.order_count,0), nvl(old.order_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0), nvl(old.order_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0), nvl(old.order_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0), nvl(old.payment_last_1d_count,0)+nvl(1d_ago.payment_count,0)- nvl(1d_ago.payment_count,0), nvl(old.payment_last_1d_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0)- nvl(1d_ago.payment_reduce_amount,0.0), nvl(old.payment_last_1d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(1d_ago.payment_amount,0.0), nvl(old.payment_last_7d_count,0)+nvl(1d_ago.payment_count,0)- nvl(7d_ago.payment_count,0), nvl(old.payment_last_7d_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0)- nvl(7d_ago.payment_reduce_amount,0.0), nvl(old.payment_last_7d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(7d_ago.payment_amount,0.0), nvl(old.payment_last_30d_count,0)+nvl(1d_ago.payment_count,0)- nvl(30d_ago.payment_count,0), nvl(old.payment_last_30d_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0)- nvl(30d_ago.payment_reduce_amount,0.0), nvl(old.payment_last_30d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(30d_ago.payment_amount,0.0), nvl(old.payment_count,0)+nvl(1d_ago.payment_count,0), nvl(old.payment_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0), nvl(old.payment_amount,0.0)+nvl(1d_ago.payment_amount,0.0), nvl(1d_ago.expire_count,0), nvl(old.expire_last_7d_count,0)+nvl(1d_ago.expire_count,0)- nvl(7d_ago.expire_count,0), nvl(old.expire_last_30d_count,0)+nvl(1d_ago.expire_count,0)- nvl(30d_ago.expire_count,0), nvl(old.expire_count,0)+nvl(1d_ago.expire_count,0) from ( select coupon_id, get_last_1d_count, get_last_7d_count, get_last_30d_count, get_count, order_last_1d_count, order_last_1d_reduce_amount, order_last_1d_original_amount, order_last_1d_final_amount, order_last_7d_count, order_last_7d_reduce_amount, order_last_7d_original_amount, order_last_7d_final_amount, order_last_30d_count, order_last_30d_reduce_amount, order_last_30d_original_amount, order_last_30d_final_amount, order_count, order_reduce_amount, order_original_amount, order_final_amount, payment_last_1d_count, payment_last_1d_reduce_amount, payment_last_1d_amount, payment_last_7d_count, payment_last_7d_reduce_amount, payment_last_7d_amount, payment_last_30d_count, payment_last_30d_reduce_amount, payment_last_30d_amount, payment_count, payment_reduce_amount, payment_amount, expire_last_1d_count, expire_last_7d_count, expire_last_30d_count, expire_count from dwt_coupon_topic where dt=date_add('2020-06-15',-1) )old full outer join ( select coupon_id, get_count, order_count, order_reduce_amount, order_original_amount, order_final_amount, payment_count, payment_reduce_amount, payment_amount, expire_count from dws_coupon_info_daycount where dt='2020-06-15' )1d_ago on old.coupon_id=1d_ago.coupon_id left join ( select coupon_id, get_count, order_count, order_reduce_amount, order_original_amount, order_final_amount, payment_count, payment_reduce_amount, payment_amount, expire_count from dws_coupon_info_daycount where dt=date_add('2020-06-15',-7) )7d_ago on old.coupon_id=7d_ago.coupon_id left join ( select coupon_id, get_count, order_count, order_reduce_amount, order_original_amount, order_final_amount, payment_count, payment_reduce_amount, payment_amount, expire_count from dws_coupon_info_daycount where dt=date_add('2020-06-15',-30) )30d_ago on old.coupon_id=30d_ago.coupon_id;

三 活动主题

1 建表语句

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
DROP TABLE IF EXISTS dwt_activity_topic; CREATE EXTERNAL TABLE dwt_activity_topic( `activity_rule_id` STRING COMMENT '活动规则ID', `activity_id` STRING COMMENT '活动ID', `order_last_1d_count` BIGINT COMMENT '最近1日参与某活动某规则下单次数', `order_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则下单优惠金额', `order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则下单原始金额', `order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则下单最终金额', `order_count` BIGINT COMMENT '参与某活动某规则累积下单次数', `order_reduce_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积下单优惠金额', `order_original_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积下单原始金额', `order_final_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积下单最终金额', `payment_last_1d_count` BIGINT COMMENT '最近1日参与某活动某规则支付次数', `payment_last_1d_reduce_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则支付优惠金额', `payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1日参与某活动某规则支付金额', `payment_count` BIGINT COMMENT '参与某活动某规则累积支付次数', `payment_reduce_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积支付优惠金额', `payment_amount` DECIMAL(16,2) COMMENT '参与某活动某规则累积支付金额' ) COMMENT '活动主题宽表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dwt/dwt_activity_topic/' TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

(1)首日导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
insert overwrite table dwt_activity_topic partition(dt='2020-06-14') select t1.activity_rule_id, t1.activity_id, nvl(order_last_1d_count,0), nvl(order_last_1d_reduce_amount,0), nvl(order_last_1d_original_amount,0), nvl(order_last_1d_final_amount,0), nvl(order_count,0), nvl(order_reduce_amount,0), nvl(order_original_amount,0), nvl(order_final_amount,0), nvl(payment_last_1d_count,0), nvl(payment_last_1d_reduce_amount,0), nvl(payment_last_1d_amount,0), nvl(payment_count,0), nvl(payment_reduce_amount,0), nvl(payment_amount,0) from ( select activity_rule_id, activity_id from dim_activity_rule_info where dt='2020-06-14' )t1 left join ( select activity_rule_id, activity_id, sum(if(dt='2020-06-14',order_count,0)) order_last_1d_count, sum(if(dt='2020-06-14',order_reduce_amount,0)) order_last_1d_reduce_amount, sum(if(dt='2020-06-14',order_original_amount,0)) order_last_1d_original_amount, sum(if(dt='2020-06-14',order_final_amount,0)) order_last_1d_final_amount, sum(order_count) order_count, sum(order_reduce_amount) order_reduce_amount, sum(order_original_amount) order_original_amount, sum(order_final_amount) order_final_amount, sum(if(dt='2020-06-14',payment_count,0)) payment_last_1d_count, sum(if(dt='2020-06-14',payment_reduce_amount,0)) payment_last_1d_reduce_amount, sum(if(dt='2020-06-14',payment_amount,0)) payment_last_1d_amount, sum(payment_count) payment_count, sum(payment_reduce_amount) payment_reduce_amount, sum(payment_amount) payment_amount from dws_activity_info_daycount group by activity_rule_id,activity_id )t2 on t1.activity_rule_id=t2.activity_rule_id and t1.activity_id=t2.activity_id;

(2)每日导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
insert overwrite table dwt_activity_topic partition(dt='2020- insert overwrite table dwt_activity_topic partition(dt='2020-06-15') select nvl(1d_ago.activity_rule_id,old.activity_rule_id), nvl(1d_ago.activity_id,old.activity_id), nvl(1d_ago.order_count,0), nvl(1d_ago.order_reduce_amount,0.0), nvl(1d_ago.order_original_amount,0.0), nvl(1d_ago.order_final_amount,0.0), nvl(old.order_count,0)+nvl(1d_ago.order_count,0), nvl(old.order_reduce_amount,0.0)+nvl(1d_ago.order_reduce_amount,0.0), nvl(old.order_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0), nvl(old.order_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0), nvl(1d_ago.payment_count,0), nvl(1d_ago.payment_reduce_amount,0.0), nvl(1d_ago.payment_amount,0.0), nvl(old.payment_count,0)+nvl(1d_ago.payment_count,0), nvl(old.payment_reduce_amount,0.0)+nvl(1d_ago.payment_reduce_amount,0.0), nvl(old.payment_amount,0.0)+nvl(1d_ago.payment_amount,0.0) from ( select activity_rule_id, activity_id, order_count, order_reduce_amount, order_original_amount, order_final_amount, payment_count, payment_reduce_amount, payment_amount from dwt_activity_topic where dt=date_add('2020-06-15',-1) )old full outer join ( select activity_rule_id, activity_id, order_count, order_reduce_amount, order_original_amount, order_final_amount, payment_count, payment_reduce_amount, payment_amount from dws_activity_info_daycount where dt='2020-06-15' )1d_ago on old.activity_rule_id=1d_ago.activity_rule_id;

四 地区主题

1 建表语句

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
DROP TABLE IF EXISTS dwt_area_topic; CREATE EXTERNAL TABLE dwt_area_topic( `province_id` STRING COMMENT '编号', `visit_last_1d_count` BIGINT COMMENT '最近1日访客访问次数', `login_last_1d_count` BIGINT COMMENT '最近1日用户访问次数', `visit_last_7d_count` BIGINT COMMENT '最近7访客访问次数', `login_last_7d_count` BIGINT COMMENT '最近7日用户访问次数', `visit_last_30d_count` BIGINT COMMENT '最近30日访客访问次数', `login_last_30d_count` BIGINT COMMENT '最近30日用户访问次数', `visit_count` BIGINT COMMENT '累积访客访问次数', `login_count` BIGINT COMMENT '累积用户访问次数', `order_last_1d_count` BIGINT COMMENT '最近1天下单次数', `order_last_1d_original_amount` DECIMAL(16,2) COMMENT '最近1天下单原始金额', `order_last_1d_final_amount` DECIMAL(16,2) COMMENT '最近1天下单最终金额', `order_last_7d_count` BIGINT COMMENT '最近7天下单次数', `order_last_7d_original_amount` DECIMAL(16,2) COMMENT '最近7天下单原始金额', `order_last_7d_final_amount` DECIMAL(16,2) COMMENT '最近7天下单最终金额', `order_last_30d_count` BIGINT COMMENT '最近30天下单次数', `order_last_30d_original_amount` DECIMAL(16,2) COMMENT '最近30天下单原始金额', `order_last_30d_final_amount` DECIMAL(16,2) COMMENT '最近30天下单最终金额', `order_count` BIGINT COMMENT '累积下单次数', `order_original_amount` DECIMAL(16,2) COMMENT '累积下单原始金额', `order_final_amount` DECIMAL(16,2) COMMENT '累积下单最终金额', `payment_last_1d_count` BIGINT COMMENT '最近1天支付次数', `payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1天支付金额', `payment_last_7d_count` BIGINT COMMENT '最近7天支付次数', `payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7天支付金额', `payment_last_30d_count` BIGINT COMMENT '最近30天支付次数', `payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30天支付金额', `payment_count` BIGINT COMMENT '累积支付次数', `payment_amount` DECIMAL(16,2) COMMENT '累积支付金额', `refund_order_last_1d_count` BIGINT COMMENT '最近1天退单次数', `refund_order_last_1d_amount` DECIMAL(16,2) COMMENT '最近1天退单金额', `refund_order_last_7d_count` BIGINT COMMENT '最近7天退单次数', `refund_order_last_7d_amount` DECIMAL(16,2) COMMENT '最近7天退单金额', `refund_order_last_30d_count` BIGINT COMMENT '最近30天退单次数', `refund_order_last_30d_amount` DECIMAL(16,2) COMMENT '最近30天退单金额', `refund_order_count` BIGINT COMMENT '累积退单次数', `refund_order_amount` DECIMAL(16,2) COMMENT '累积退单金额', `refund_payment_last_1d_count` BIGINT COMMENT '最近1天退款次数', `refund_payment_last_1d_amount` DECIMAL(16,2) COMMENT '最近1天退款金额', `refund_payment_last_7d_count` BIGINT COMMENT '最近7天退款次数', `refund_payment_last_7d_amount` DECIMAL(16,2) COMMENT '最近7天退款金额', `refund_payment_last_30d_count` BIGINT COMMENT '最近30天退款次数', `refund_payment_last_30d_amount` DECIMAL(16,2) COMMENT '最近30天退款金额', `refund_payment_count` BIGINT COMMENT '累积退款次数', `refund_payment_amount` DECIMAL(16,2) COMMENT '累积退款金额' ) COMMENT '地区主题宽表' PARTITIONED BY (`dt` STRING) STORED AS ORC LOCATION '/warehouse/gmall/dwt/dwt_area_topic/' TBLPROPERTIES ("orc.compress"="snappy");

2 数据导入

(1)首日导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
insert overwrite table dwt_area_topic partition(dt='2020-06-14') select id, nvl(visit_last_1d_count,0), nvl(login_last_1d_count,0), nvl(visit_last_7d_count,0), nvl(login_last_7d_count,0), nvl(visit_last_30d_count,0), nvl(login_last_30d_count,0), nvl(visit_count,0), nvl(login_count,0), nvl(order_last_1d_count,0), nvl(order_last_1d_original_amount,0), nvl(order_last_1d_final_amount,0), nvl(order_last_7d_count,0), nvl(order_last_7d_original_amount,0), nvl(order_last_7d_final_amount,0), nvl(order_last_30d_count,0), nvl(order_last_30d_original_amount,0), nvl(order_last_30d_final_amount,0), nvl(order_count,0), nvl(order_original_amount,0), nvl(order_final_amount,0), nvl(payment_last_1d_count,0), nvl(payment_last_1d_amount,0), nvl(payment_last_7d_count,0), nvl(payment_last_7d_amount,0), nvl(payment_last_30d_count,0), nvl(payment_last_30d_amount,0), nvl(payment_count,0), nvl(payment_amount,0), nvl(refund_order_last_1d_count,0), nvl(refund_order_last_1d_amount,0), nvl(refund_order_last_7d_count,0), nvl(refund_order_last_7d_amount,0), nvl(refund_order_last_30d_count,0), nvl(refund_order_last_30d_amount,0), nvl(refund_order_count,0), nvl(refund_order_amount,0), nvl(refund_payment_last_1d_count,0), nvl(refund_payment_last_1d_amount,0), nvl(refund_payment_last_7d_count,0), nvl(refund_payment_last_7d_amount,0), nvl(refund_payment_last_30d_count,0), nvl(refund_payment_last_30d_amount,0), nvl(refund_payment_count,0), nvl(refund_payment_amount,0) from ( select id from dim_base_province )t1 left join ( select province_id province_id, sum(if(dt='2020-06-14',visit_count,0)) visit_last_1d_count, sum(if(dt='2020-06-14',login_count,0)) login_last_1d_count, sum(if(dt>=date_add('2020-06-14',-6),visit_count,0)) visit_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),login_count,0)) login_last_7d_count, sum(if(dt>=date_add('2020-06-14',-29),visit_count,0)) visit_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),login_count,0)) login_last_30d_count, sum(visit_count) visit_count, sum(login_count) login_count, sum(if(dt='2020-06-14',order_count,0)) order_last_1d_count, sum(if(dt='2020-06-14',order_original_amount,0)) order_last_1d_original_amount, sum(if(dt='2020-06-14',order_final_amount,0)) order_last_1d_final_amount, sum(if(dt>=date_add('2020-06-14',-6),order_count,0)) order_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),order_original_amount,0)) order_last_7d_original_amount, sum(if(dt>=date_add('2020-06-14',-6),order_final_amount,0)) order_last_7d_final_amount, sum(if(dt>=date_add('2020-06-14',-29),order_count,0)) order_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),order_original_amount,0)) order_last_30d_original_amount, sum(if(dt>=date_add('2020-06-14',-29),order_final_amount,0)) order_last_30d_final_amount, sum(order_count) order_count, sum(order_original_amount) order_original_amount, sum(order_final_amount) order_final_amount, sum(if(dt='2020-06-14',payment_count,0)) payment_last_1d_count, sum(if(dt='2020-06-14',payment_amount,0)) payment_last_1d_amount, sum(if(dt>=date_add('2020-06-14',-6),payment_count,0)) payment_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),payment_amount,0)) payment_last_7d_amount, sum(if(dt>=date_add('2020-06-14',-29),payment_count,0)) payment_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),payment_amount,0)) payment_last_30d_amount, sum(payment_count) payment_count, sum(payment_amount) payment_amount, sum(if(dt='2020-06-14',refund_order_count,0)) refund_order_last_1d_count, sum(if(dt='2020-06-14',refund_order_amount,0)) refund_order_last_1d_amount, sum(if(dt>=date_add('2020-06-14',-6),refund_order_count,0)) refund_order_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),refund_order_amount,0)) refund_order_last_7d_amount, sum(if(dt>=date_add('2020-06-14',-29),refund_order_count,0)) refund_order_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),refund_order_amount,0)) refund_order_last_30d_amount, sum(refund_order_count) refund_order_count, sum(refund_order_amount) refund_order_amount, sum(if(dt='2020-06-14',refund_payment_count,0)) refund_payment_last_1d_count, sum(if(dt='2020-06-14',refund_payment_amount,0)) refund_payment_last_1d_amount, sum(if(dt>=date_add('2020-06-14',-6),refund_payment_count,0)) refund_payment_last_7d_count, sum(if(dt>=date_add('2020-06-14',-6),refund_payment_amount,0)) refund_payment_last_7d_amount, sum(if(dt>=date_add('2020-06-14',-29),refund_payment_count,0)) refund_payment_last_30d_count, sum(if(dt>=date_add('2020-06-14',-29),refund_payment_amount,0)) refund_payment_last_30d_amount, sum(refund_payment_count) refund_payment_count, sum(refund_payment_amount) refund_payment_amount from dws_area_stats_daycount group by province_id )t2 on t1.id=t2.province_id;

(2)每日导入

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
insert overwrite table dwt_area_topic partition(dt='2020-06-15') select nvl(old.province_id, 1d_ago.province_id), nvl(1d_ago.visit_count,0), nvl(1d_ago.login_count,0), nvl(old.visit_last_7d_count,0)+nvl(1d_ago.visit_count,0)- nvl(7d_ago.visit_count,0), nvl(old.login_last_7d_count,0)+nvl(1d_ago.login_count,0)- nvl(7d_ago.login_count,0), nvl(old.visit_last_30d_count,0)+nvl(1d_ago.visit_count,0)- nvl(30d_ago.visit_count,0), nvl(old.login_last_30d_count,0)+nvl(1d_ago.login_count,0)- nvl(30d_ago.login_count,0), nvl(old.visit_count,0)+nvl(1d_ago.visit_count,0), nvl(old.login_count,0)+nvl(1d_ago.login_count,0), nvl(1d_ago.order_count,0), nvl(1d_ago.order_original_amount,0.0), nvl(1d_ago.order_final_amount,0.0), nvl(old.order_last_7d_count,0)+nvl(1d_ago.order_count,0)- nvl(7d_ago.order_count,0), nvl(old.order_last_7d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(7d_ago.order_original_amount,0.0), nvl(old.order_last_7d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(7d_ago.order_final_amount,0.0), nvl(old.order_last_30d_count,0)+nvl(1d_ago.order_count,0)- nvl(30d_ago.order_count,0), nvl(old.order_last_30d_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0)- nvl(30d_ago.order_original_amount,0.0), nvl(old.order_last_30d_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0)- nvl(30d_ago.order_final_amount,0.0), nvl(old.order_count,0)+nvl(1d_ago.order_count,0), nvl(old.order_original_amount,0.0)+nvl(1d_ago.order_original_amount,0.0), nvl(old.order_final_amount,0.0)+nvl(1d_ago.order_final_amount,0.0), nvl(1d_ago.payment_count,0), nvl(1d_ago.payment_amount,0.0), nvl(old.payment_last_7d_count,0)+nvl(1d_ago.payment_count,0)- nvl(7d_ago.payment_count,0), nvl(old.payment_last_7d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(7d_ago.payment_amount,0.0), nvl(old.payment_last_30d_count,0)+nvl(1d_ago.payment_count,0)- nvl(30d_ago.payment_count,0), nvl(old.payment_last_30d_amount,0.0)+nvl(1d_ago.payment_amount,0.0)- nvl(30d_ago.payment_amount,0.0), nvl(old.payment_count,0)+nvl(1d_ago.payment_count,0), nvl(old.payment_amount,0.0)+nvl(1d_ago.payment_amount,0.0), nvl(1d_ago.refund_order_count,0), nvl(1d_ago.refund_order_amount,0.0), nvl(old.refund_order_last_7d_count,0)+nvl(1d_ago.refund_order_count,0)- nvl(7d_ago.refund_order_count,0), nvl(old.refund_order_last_7d_amount,0.0)+nvl(1d_ago.refund_order_amount,0.0)- nvl(7d_ago.refund_order_amount,0.0), nvl(old.refund_order_last_30d_count,0)+nvl(1d_ago.refund_order_count,0)- nvl(30d_ago.refund_order_count,0), nvl(old.refund_order_last_30d_amount,0.0)+nvl(1d_ago.refund_order_amount,0.0)- nvl(30d_ago.refund_order_amount,0.0), nvl(old.refund_order_count,0)+nvl(1d_ago.refund_order_count,0), nvl(old.refund_order_amount,0.0)+nvl(1d_ago.refund_order_amount,0.0), nvl(1d_ago.refund_payment_count,0), nvl(1d_ago.refund_payment_amount,0.0), nvl(old.refund_payment_last_7d_count,0)+nvl(1d_ago.refund_payment_count,0)- nvl(7d_ago.refund_payment_count,0), nvl(old.refund_payment_last_7d_amount,0.0)+nvl(1d_ago.refund_payment_amount,0.0)- nvl(7d_ago.refund_payment_amount,0.0), nvl(old.refund_payment_last_30d_count,0)+nvl(1d_ago.refund_payment_count,0)- nvl(30d_ago.refund_payment_count,0), nvl(old.refund_payment_last_30d_amount,0.0)+nvl(1d_ago.refund_payment_amount,0.0)- nvl(30d_ago.refund_payment_amount,0.0), nvl(old.refund_payment_count,0)+nvl(1d_ago.refund_payment_count,0), nvl(old.refund_payment_amount,0.0)+nvl(1d_ago.refund_payment_amount,0.0) from ( select province_id, visit_last_1d_count, login_last_1d_count, visit_last_7d_count, login_last_7d_count, visit_last_30d_count, login_last_30d_count, visit_count, login_count, order_last_1d_count, order_last_1d_original_amount, order_last_1d_final_amount, order_last_7d_count, order_last_7d_original_amount, order_last_7d_final_amount, order_last_30d_count, order_last_30d_original_amount, order_last_30d_final_amount, order_count, order_original_amount, order_final_amount, payment_last_1d_count, payment_last_1d_amount, payment_last_7d_count, payment_last_7d_amount, payment_last_30d_count, payment_last_30d_amount, payment_count, payment_amount, refund_order_last_1d_count, refund_order_last_1d_amount, refund_order_last_7d_count, refund_order_last_7d_amount, refund_order_last_30d_count, refund_order_last_30d_amount, refund_order_count, refund_order_amount, refund_payment_last_1d_count, refund_payment_last_1d_amount, refund_payment_last_7d_count, refund_payment_last_7d_amount, refund_payment_last_30d_count, refund_payment_last_30d_amount, refund_payment_count, refund_payment_amount from dwt_area_topic where dt=date_add('2020-06-15',-1) )old full outer join ( select province_id, visit_count, login_count, order_count, order_original_amount, order_final_amount, payment_count, payment_amount, refund_order_count, refund_order_amount, refund_payment_count, refund_payment_amount from dws_area_stats_daycount where dt='2020-06-15' )1d_ago on old.province_id=1d_ago.province_id left join ( select province_id, visit_count, login_count, order_count, order_original_amount, order_final_amount, payment_count, payment_amount, refund_order_count, refund_order_amount, refund_payment_count, refund_payment_amount from dws_area_stats_daycount where dt=date_add('2020-06-15',-7) )7d_ago on old.province_id= 7d_ago.province_id left join ( select province_id, visit_count, login_count, order_count, order_original_amount, order_final_amount, payment_count, payment_amount, refund_order_count, refund_order_amount, refund_payment_count, refund_payment_amount from dws_area_stats_daycount where dt=date_add('2020-06-15',-30) )30d_ago on old.province_id= 30d_ago.province_id;

最后

以上就是舒适小虾米最近收集整理的关于【电商数仓】数仓搭建之数据主题(data warehouse topic-- DWT)层(DWT层之访客、优惠券、活动、地区主题)一 访客主题二 优惠券主题三 活动主题四 地区主题的全部内容,更多相关【电商数仓】数仓搭建之数据主题(data内容请搜索靠谱客的其他文章。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(65)

评论列表共有 0 条评论

立即
投稿
返回
顶部