概述
常用指标
uCTR --点击用户数 / 曝光用户数
ctr – 点击次数 / 曝光次数
uCVR --购买用户数 / 点击用户数
cvr – 购买次数/点击次数
ctcvr – 购买用户数 / 曝光用户数
arpu – 成功支付的金额 / 曝光用户数
ecpm- 流水*1000/曝光次数
package sparkSQL
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
object BayesParam {
def main(args: Array[String]): Unit = {
val spark = SparkSession
.builder()
.appName(this.getClass.getName)
.master("local[2]")
.getOrCreate()
val sc = spark.sparkContext
val lineRDD = sc.textFile("F:\ideaProjects\spark-version2\src\main\resources\bayesparam\themeInfo")
import spark.implicits._
val themeInfoDF = lineRDD.map {
line => {
val arr = line.split(" ")
val groupId = arr(0)
val themeId = arr(1)
val theme_ver = arr(2)
val price_level = arr(3)
val exposure_users = arr(4)
val click_users = arr(5)
val payment_users = arr(6)
val payment_amt = arr(7)
(groupId, themeId, theme_ver, price_level, exposure_users, click_users, payment_users, payment_amt)
}
}.toDF("groupId", "themeId", "theme_ver", "price_level", "exposure_users", "click_users", "payment_users", "payment_amt")
import org.apache.spark.sql.functions._
val key = "key"
val values = Array("ctr", "cvr", "ctcvr", "arpu")
val themeInfoCtrDF = themeInfoDF.withColumn(key, concat_ws("-", col("groupId"), col("theme_ver")))
.withColumn("ctr", col("click_users") / col("exposure_users"))
.withColumn("cvr", col("payment_users") / col("click_users"))
.withColumn("ctcvr", col("payment_users") / col("exposure_users"))
.withColumn("arpu", col("payment_amt") / col("exposure_users"))
.selectExpr(key, "groupId", "themeId", "theme_ver", "price_level", "exposure_users", "click_users", "payment_users", "payment_amt","ctr", "cvr", "ctcvr", "arpu")
//获取分组key的ctr、cvr、ctcvr、arpu指标的alpha、beta参数
var tmp_themeInfoCtrDF = themeInfoCtrDF.select(key).distinct()
for (value <- values) {
//计算平均数mean和方差variance
val meanAndVarianceMap: Map[String, Array[Double]] = evaluateMeanAndVariance(key, value, themeInfoCtrDF)
val alphaAndBetaMap: Map[String, Array[Double]] = evaluateAlphaAndBeta(meanAndVarianceMap)
//通过实名函数定义UDF
val getMeanUDF = udf((key: String) => meanAndVarianceMap.getOrElse(key, Array(0.0, 0.0))(0))
val getVarianceUDF = udf((key: String) => meanAndVarianceMap.getOrElse(key, Array(0.0, 0.0))(1))
val getAlphaUDF = udf((key: String) => alphaAndBetaMap.getOrElse(key, Array(0.0, 0.0))(0))
val getBetaUDF = udf((key: String) => alphaAndBetaMap.getOrElse(key, Array(0.0, 0.0))(1))
if (value.equals("ctr")) {
tmp_themeInfoCtrDF = tmp_themeInfoCtrDF
.withColumn(value+"_mean", getMeanUDF(col(key)))
.withColumn(value+"_var",getVarianceUDF(col(key)))
.withColumn(value+"_a", getAlphaUDF(col(key)))
.withColumn(value+"_b",getBetaUDF(col(key)))
} else {
tmp_themeInfoCtrDF = tmp_themeInfoCtrDF
.withColumn(value+"_a", getAlphaUDF(col(key)))
.withColumn(value+"_b",getBetaUDF(col(key)))
}
}
tmp_themeInfoCtrDF = tmp_themeInfoCtrDF.select(key,"ctr_mean","ctr_var","ctr_a","ctr_b","cvr_a","cvr_b","ctcvr_a","ctcvr_b","arpu_a","arpu_b")
//关联themeInfoCtrDF,计算贝叶斯平滑修正后的ctr、cvr、ctcvr、arpu
val resultDF = themeInfoCtrDF.join(tmp_themeInfoCtrDF,Seq(key))
.select(key,"groupId", "themeId", "theme_ver", "price_level", "exposure_users", "click_users", "payment_users", "payment_amt",
"ctr", "cvr", "ctcvr", "arpu",
"ctr_mean","ctr_var",
"ctr_a","ctr_b","cvr_a","cvr_b","ctcvr_a","ctcvr_b","arpu_a","arpu_b")
.withColumn("b_ctr",(col("click_users")+col("ctr_a")) / (col("exposure_users")+col("ctr_a")+col("ctr_b")))
.withColumn("b_cvr",(col("payment_users")+col("cvr_a")) / (col("click_users")+col("cvr_a")+col("cvr_b")))
.withColumn("b_ctcvr",(col("payment_users")+col("ctcvr_a")) / (col("exposure_users")+col("ctcvr_a")+col("ctcvr_b")))
.withColumn("b_arpu",(col("payment_amt")+col("ctcvr_a")) / (col("exposure_users")+col("ctcvr_a")+col("ctcvr_b")))
.select(key,"groupId", "themeId", "theme_ver", "price_level", "exposure_users", "click_users", "payment_users", "payment_amt",
"ctr", "cvr", "ctcvr", "arpu",
"ctr_mean","ctr_var",
"ctr_a","ctr_b",
"b_ctr","b_cvr","b_ctcvr","b_arpu")
resultDF.show()
}
def evaluateAlphaAndBeta(meanAndVarianceMap: Map[String, Array[Double]]): Map[String, Array[Double]] = {
meanAndVarianceMap.map {
case (key: String, meanAndVariance: Array[Double]) => {
val mean = meanAndVariance(0)
val variance = meanAndVariance(1)
var tmp = 0.0
if (mean != 0) {
tmp = mean * (1 - mean) / variance - 1
}
val alpha = mean * tmp
val beta = (1 - mean) * tmp
(key, Array(alpha, beta))
}
}
}
def evaluateMeanAndVariance(key: String, value: String, themeInfoCtrDF: DataFrame): Map[String, Array[Double]] = {
import org.apache.spark.sql.functions._
val tmpDF = themeInfoCtrDF.withColumn(key, concat_ws("-", col("groupId"), col("theme_ver")))
.selectExpr(key, "groupId", "themeId", "theme_ver", "price_level", "exposure_users", "click_users", "payment_users", value)
.groupBy(key)
.agg(avg(value) as "mean", variance(value) as "variance")
.selectExpr(key, "mean", "variance")
val rdd = tmpDF.rdd
tmpDF.rdd.map {
case Row(key: String, mean: Double, variance: Double) => {
(key, Array(mean, variance))
}
}.collect().toMap
}
}
测试数据:
P3105 10008611 10 0 100 10 1 2
P3105 10008612 10 1 3000 150 10 30
P3105 10008613 10 0 4000 400 20 40
P3105 10008614 10 1 5000 1000 100 300
P3105 10008615 10 0 10000 1000 200 600
P3302 10008811 10 0 100 10 1 3
P3302 10008812 10 1 3000 150 10 20
P3302 10008813 10 0 4000 400 20 40
P3302 10008814 10 1 5000 1000 100 300
P3302 10008815 10 0 10000 1000 200 400
P6105 10009911 10 0 100 10 1 4
P6105 10009912 10 1 3000 150 10 30
P6105 10009913 10 0 4000 400 20 50
P6105 10009914 10 1 5000 1000 100 300
P6105 10009915 10 0 10000 1000 200 400
P6106 10006611 10 0 1000 500 50 600
P6106 10006612 10 0 10000 1000 200 800
P6107 10006611 10 0 10000 1000 200 800
P6107 10006612 10 0 10000 1000 200 800
测试结果:
+--------+-------+--------+---------+-----------+--------------+-----------+-------------+-----------+----+-------------------+--------------------+--------------------+-------------------+-------------------+------------------+------------------+-------------------+--------------------+--------------------+--------------------+
| key|groupId| themeId|theme_ver|price_level|exposure_users|click_users|payment_users|payment_amt| ctr| cvr| ctcvr| arpu| ctr_mean| ctr_var| ctr_a| ctr_b| b_ctr| b_cvr| b_ctcvr| b_arpu|
+--------+-------+--------+---------+-----------+--------------+-----------+-------------+-----------+----+-------------------+--------------------+--------------------+-------------------+-------------------+------------------+------------------+-------------------+--------------------+--------------------+--------------------+
|P3105-10| P3105|10008611| 10| 0| 100| 10| 1| 2| 0.1| 0.1| 0.01| 0.02|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673| 0.102403140035452| 0.1024160952724648|0.011070317361543248|0.014648413192283759|
|P3105-10| P3105|10008612| 10| 1| 3000| 150| 10| 30|0.05|0.06666666666666667|0.003333333333333...| 0.01|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.05062606515739591| 0.07214376022608955|0.003803741384968...|0.010094081610327097|
|P3105-10| P3105|10008613| 10| 0| 4000| 400| 20| 40| 0.1| 0.05| 0.005| 0.01|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.10007846282317341|0.053295138194626016|0.005286284953395472|0.010071571238348868|
|P3105-10| P3105|10008614| 10| 1| 5000| 1000| 100| 300| 0.2| 0.1| 0.02| 0.06|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.19943417975607652| 0.10008554981248043| 0.01971123497414546| 0.05832516285004365|
|P3105-10| P3105|10008615| 10| 0| 10000| 1000| 200| 600| 0.1| 0.2| 0.02| 0.06|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.10003153358210194| 0.1975190554380674|0.019853071823959543| 0.05914781657896534|
|P6105-10| P6105|10009911| 10| 0| 100| 10| 1| 4| 0.1| 0.1| 0.01| 0.04|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673| 0.102403140035452| 0.1024160952724648|0.011070317361543248| 0.02180460485376478|
|P6105-10| P6105|10009912| 10| 1| 3000| 150| 10| 30|0.05|0.06666666666666667|0.003333333333333...| 0.01|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.05062606515739591| 0.07214376022608955|0.003803741384968...|0.010094081610327097|
|P6105-10| P6105|10009913| 10| 0| 4000| 400| 20| 50| 0.1| 0.05| 0.005| 0.0125|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.10007846282317341|0.053295138194626016|0.005286284953395472|0.012464214380825567|
|P6105-10| P6105|10009914| 10| 1| 5000| 1000| 100| 300| 0.2| 0.1| 0.02| 0.06|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.19943417975607652| 0.10008554981248043| 0.01971123497414546| 0.05832516285004365|
|P6105-10| P6105|10009915| 10| 0| 10000| 1000| 200| 400| 0.1| 0.2| 0.02| 0.04|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.10003153358210194| 0.1975190554380674|0.019853071823959543| 0.03950044420146245|
|P6106-10| P6106|10006611| 10| 0| 1000| 500| 50| 600| 0.5| 0.1| 0.05| 0.6| 0.3|0.08000000000000002|0.4874999999999998|1.1374999999999995| 0.4996755272681892| 0.1023355576739752| 0.04896575803031086| 0.5610435524750427|
|P6106-10| P6106|10006612| 10| 0| 10000| 1000| 200| 800| 0.1| 0.2| 0.02| 0.08| 0.3|0.08000000000000002|0.4874999999999998|1.1374999999999995|0.10003249471960805| 0.19880429477794048|0.020110266746813873| 0.0796691997595584|
|P6107-10| P6107|10006611| 10| 0| 10000| 1000| 200| 800| 0.1| 0.2| 0.02| 0.08| 0.1| 0.0| Infinity| Infinity| NaN| NaN| NaN| NaN|
|P6107-10| P6107|10006612| 10| 0| 10000| 1000| 200| 800| 0.1| 0.2| 0.02| 0.08| 0.1| 0.0| Infinity| Infinity| NaN| NaN| NaN| NaN|
|P3302-10| P3302|10008811| 10| 0| 100| 10| 1| 3| 0.1| 0.1| 0.01| 0.03|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673| 0.102403140035452| 0.1024160952724648|0.011070317361543248| 0.01822650902302427|
|P3302-10| P3302|10008812| 10| 1| 3000| 150| 10| 20|0.05|0.06666666666666667|0.003333333333333...|0.006666666666666667|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.05062606515739591| 0.07214376022608955|0.003803741384968...| 0.00694891149764796|
|P3302-10| P3302|10008813| 10| 0| 4000| 400| 20| 40| 0.1| 0.05| 0.005| 0.01|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.10007846282317341|0.053295138194626016|0.005286284953395472|0.010071571238348868|
|P3302-10| P3302|10008814| 10| 1| 5000| 1000| 100| 300| 0.2| 0.1| 0.02| 0.06|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.19943417975607652| 0.10008554981248043| 0.01971123497414546| 0.05832516285004365|
|P3302-10| P3302|10008815| 10| 0| 10000| 1000| 200| 400| 0.1| 0.2| 0.02| 0.04|0.11000000000000001| 0.003| 3.479666666666668|28.153666666666673|0.10003153358210194| 0.1975190554380674|0.019853071823959543| 0.03950044420146245|
+--------+-------+--------+---------+-----------+--------------+-----------+-------------+-----------+----+-------------------+--------------------+--------------------+-------------------+-------------------+------------------+------------------+-------------------+--------------------+--------------------+--------------------+
最后
以上就是无奈巨人为你收集整理的贝叶斯平滑的全部内容,希望文章能够帮你解决贝叶斯平滑所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复