package mllib
import org.apache.log4j.{Level, Logger}
import org.apache.spark.{SparkContext, SparkConf}
import scala.collection.mutable.Map
/**
* Created by 汪本成 on 2016/8/4.
*/
object UserSimilar {
//屏蔽不必要的日志显示在终端上
Logger.getLogger("org.apache.spark").setLevel(Level.WARN)
Logger.getLogger("org.apache.eclipse.jetty.server").setLevel(Level.OFF)
//程序入口
val conf = new SparkConf().setMaster("local[1]").setAppName(this.getClass().getSimpleName().filter(!_.equals('$')))
println(this.getClass().getSimpleName().filter(!_.equals('$')))
val sc = new SparkContext(conf)
//设置用户名
val users = sc.parallelize(Array("张三", "李四", "王五", "赵六", "阿七"))
//设置电影名
val films = sc.parallelize(Array("逆战", "人间", "鬼屋", "西游记", "雪豹"))
//使用一个source嵌套map作为姓名电影名和分值的存储
val source = Map[String, Map[String, Int]]()
//设置一个用以存放电影分的map
val filmSource = Map[String, Int]()
def getSource(): Map[String, Map[String, Int]] = {
//设置电影评分
val user1FilmSource = Map("逆战" -> 2, "人间" -> 3, "鬼屋" -> 1, "西游记" -> 0, "雪豹" -> 1)
val user2FilmSource = Map("逆战" -> 1, "人间" -> 2, "鬼屋" -> 2, "西游记" -> 1, "雪豹" -> 4)
val user3FilmSource = Map("逆战" -> 2, "人间" -> 1, "鬼屋" -> 0, "西游记" -> 1, "雪豹" -> 4)
val user4FilmSource = Map("逆战" -> 3, "人间" -> 2, "鬼屋" -> 0, "西游记" -> 5, "雪豹" -> 3)
val user5FilmSource = Map("逆战" -> 5, "人间" -> 3, "鬼屋" -> 1, "西游记" -> 1, "雪豹" -> 2)
//对人名进行储存
source += ("张三" -> user1FilmSource)
source += ("李四" -> user2FilmSource)
source += ("王五" -> user3FilmSource)
source += ("赵六" -> user4FilmSource)
source += ("阿七" -> user5FilmSource)
//返回嵌套map
source
}
//两两计算分值,采用余弦相似性
def getCollaborateSource(user1: String, user2: String): Double = {
//获得1,2两个用户的评分
val user1FilmSource = source.get(user1).get.values.toVector
val user2FilmSource = source.get(user2).get.values.toVector
//对公式部分分子进行计算
val member = user1FilmSource.zip(user2FilmSource).map(d => d._1 * d._2).reduce(_ + _).toDouble
//求出分母第一个变量值
val temp1 = math.sqrt(user1FilmSource.map(num => {math.pow(num, 2)}).reduce(_ + _))
//求出分母第二个变量值
val temp2 = math.sqrt(user2FilmSource.map(num => {math.pow(num, 2)}).reduce(_ + _))
//求出分母
val denominator = temp1 * temp2
//进行计算
member / denominator
}
def main(args: Array[String]) {
//初始化分数
getSource()
val name1 = "张三"
val name2 = "李四"
val name3 = "王五"
val name4 = "赵六"
val name5 = "阿七"
users.foreach(user => {
println(name1 + " 相对于 " + user + " 的相似性分数是 " + getCollaborateSource(name1, user) )
})
println("--------------------------------------------------------------------------")
users.foreach(user => {
println(name2 + " 相对于 " + user + " 的相似性分数是 " + getCollaborateSource(name2, user) )
})
println("--------------------------------------------------------------------------")
users.foreach(user => {
println(name3 + " 相对于 " + user + " 的相似性分数是 " + getCollaborateSource(name3, user) )
})
println("--------------------------------------------------------------------------")
users.foreach(user => {
println(name4 + " 相对于 " + user + " 的相似性分数是 " + getCollaborateSource(name4, user) )
})
println("--------------------------------------------------------------------------")
users.foreach(user => {
println(name5 + " 相对于 " + user + " 的相似性分数是 " + getCollaborateSource(name5, user) )
})
}
}
运行结果: "C:Program FilesJavajdk1.8.0_77binjava" -Didea.launcher.port=7534 "-Didea.launcher.bin.path=D:Program Files (x86)JetBrainsIntelliJ IDEA 15.0.5bin" -Dfile.encoding=UTF-8 -classpath "C:Program FilesJavajdk1.8.0_77jrelibcharsets.jar;C:Program FilesJavajdk1.8.0_77jrelibdeploy.jar;C:Program FilesJavajdk1.8.0_77jrelibextaccess-bridge-64.jar;C:Program FilesJavajdk1.8.0_77jrelibextcldrdata.jar;C:Program FilesJavajdk1.8.0_77jrelibextdnsns.jar;C:Program FilesJavajdk1.8.0_77jrelibextjaccess.jar;C:Program FilesJavajdk1.8.0_77jrelibextjfxrt.jar;C:Program FilesJavajdk1.8.0_77jrelibextlocaledata.jar;C:Program FilesJavajdk1.8.0_77jrelibextnashorn.jar;C:Program FilesJavajdk1.8.0_77jrelibextsunec.jar;C:Program FilesJavajdk1.8.0_77jrelibextsunjce_provider.jar;C:Program FilesJavajdk1.8.0_77jrelibextsunmscapi.jar;C:Program FilesJavajdk1.8.0_77jrelibextsunpkcs11.jar;C:Program FilesJavajdk1.8.0_77jrelibextzipfs.jar;C:Program FilesJavajdk1.8.0_77jrelibjavaws.jar;C:Program FilesJavajdk1.8.0_77jrelibjce.jar;C:Program FilesJavajdk1.8.0_77jrelibjfr.jar;C:Program FilesJavajdk1.8.0_77jrelibjfxswt.jar;C:Program FilesJavajdk1.8.0_77jrelibjsse.jar;C:Program FilesJavajdk1.8.0_77jrelibmanagement-agent.jar;C:Program FilesJavajdk1.8.0_77jrelibplugin.jar;C:Program FilesJavajdk1.8.0_77jrelibresources.jar;C:Program FilesJavajdk1.8.0_77jrelibrt.jar;G:locationspark-mlliboutproductionspark-mllib;C:Program Files (x86)scalalibscala-actors-migration.jar;C:Program Files (x86)scalalibscala-actors.jar;C:Program Files (x86)scalalibscala-library.jar;C:Program Files (x86)scalalibscala-reflect.jar;C:Program Files (x86)scalalibscala-swing.jar;G:homedownloadspark-1.6.1-bin-hadoop2.6libdatanucleus-api-jdo-3.2.6.jar;G:homedownloadspark-1.6.1-bin-hadoop2.6libdatanucleus-core-3.2.10.jar;G:homedownloadspark-1.6.1-bin-hadoop2.6libdatanucleus-rdbms-3.2.9.jar;G:homedownloadspark-1.6.1-bin-hadoop2.6libspark-1.6.1-yarn-shuffle.jar;G:homedownloadspark-1.6.1-bin-hadoop2.6libspark-assembly-1.6.1-hadoop2.6.0.jar;G:homedownloadspark-1.6.1-bin-hadoop2.6libspark-examples-1.6.1-hadoop2.6.0.jar;D:Program Files (x86)JetBrainsIntelliJ IDEA 15.0.5libidea_rt.jar" com.intellij.rt.execution.application.AppMain mllib.CollaborativeFilteringSpark
CollaborativeFilteringSpark
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/G:/home/download/spark-1.6.1-bin-hadoop2.6/lib/spark-assembly-1.6.1-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/G:/home/download/spark-1.6.1-bin-hadoop2.6/lib/spark-examples-1.6.1-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
16/08/04 20:41:49 INFO Slf4jLogger: Slf4jLogger started
16/08/04 20:41:49 INFO Remoting: Starting remoting
16/08/04 20:41:49 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriverActorSystem@192.168.1.100:8380]
张三 相对于 张三 的相似性分数是 0.9999999999999999
张三 相对于 李四 的相似性分数是 0.7089175569585667
张三 相对于 王五 的相似性分数是 0.6055300708194983
张三 相对于 赵六 的相似性分数是 0.564932682866032
张三 相对于 阿七 的相似性分数是 0.8981462390204985
--------------------------------------------------------------------------
李四 相对于 张三 的相似性分数是 0.7089175569585667
李四 相对于 李四 的相似性分数是 1.0000000000000002
李四 相对于 王五 的相似性分数是 0.8780541105074453
李四 相对于 赵六 的相似性分数是 0.6865554812287477
李四 相对于 阿七 的相似性分数是 0.6821910402406466
--------------------------------------------------------------------------
王五 相对于 张三 的相似性分数是 0.6055300708194983
王五 相对于 李四 的相似性分数是 0.8780541105074453
王五 相对于 王五 的相似性分数是 1.0
王五 相对于 赵六 的相似性分数是 0.7774630169639036
王五 相对于 阿七 的相似性分数是 0.7416198487095662
--------------------------------------------------------------------------
赵六 相对于 张三 的相似性分数是 0.564932682866032
赵六 相对于 李四 的相似性分数是 0.6865554812287477
赵六 相对于 王五 的相似性分数是 0.7774630169639036
赵六 相对于 赵六 的相似性分数是 1.0
赵六 相对于 阿七 的相似性分数是 0.738024966423108
--------------------------------------------------------------------------
阿七 相对于 张三 的相似性分数是 0.8981462390204985
阿七 相对于 李四 的相似性分数是 0.6821910402406466
阿七 相对于 王五 的相似性分数是 0.7416198487095662
阿七 相对于 赵六 的相似性分数是 0.738024966423108
阿七 相对于 阿七 的相似性分数是 0.9999999999999998
16/08/04 20:41:51 INFO RemoteActorRefProvider$RemotingTerminator: Shutting down remote daemon.
Process finished with exit code 0
|
发表评论 取消回复