我是靠谱客的博主 认真期待,最近开发中收集的这篇文章主要介绍Mahout使用(一),觉得挺不错的,现在分享给大家,希望可以做个参考。

概述

1.HelloMahout.java
2.DistanceTest.java
3.MahoutDemo.java

 

 

1.HelloMahout.java

 1 package cn.crxy.mahout;
 2
 3 import java.io.File;
 4 import java.util.List;
 5
 6 import org.apache.log4j.Logger;
 7 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 8 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
 9 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
10 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
11 import org.apache.mahout.cf.taste.model.DataModel;
12 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
13 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
14 import org.apache.mahout.cf.taste.recommender.Recommender;
15 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
16
17 public class HelloMahout {
18
19 
public static void main(String[] args) {
20
21
Logger logger=Logger.getLogger(HelloMahout.class);
22
try {
23
//读取用户评分数据
封装成一个model
24
DataModel model = new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv"));
25
// 根据相似度找出对应的好朋友的标准
物以类聚,人以群分
26
UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model);
27
// 邻域 选择两个好朋友帮我推荐
28
UserNeighborhood userNeighborhood = new NearestNUserNeighborhood(2,userSimilarity, model);
29
// 构建推荐引擎
30
Recommender recommender = new GenericUserBasedRecommender(model,userNeighborhood, userSimilarity);
31
// 进行推荐
32
List<RecommendedItem> recommend = recommender.recommend(1, 5);
33
for (RecommendedItem item : recommend) {
34 
logger.info(item);
35 
}
36 
} catch (Exception e) {
37 
logger.error(e.getMessage());
38 
}
39 
}
40 }
View Code

 

 

2.DistanceTest.java

 1 package cn.crxy.mahout;
 2
 3 import org.junit.Before;
 4 import org.junit.Test;
 5
 6 public class DistanceTest {
 7
 8
//
水果维度依次为:苹果、梨、桃子、栗子、香蕉、橘子
 9
//
小明:5,4,2,1,5,5
10
//
小丽:5,3,1,2,1,1
11
//
小王:5,3,4,1,4,3
12 
private int[] a;
13 
private int[] b;
14 
private int[] c;
15
16 
@Before
17 
public void initData(){
18
a=new int[]{5,4,2,1,5,5};
19
b=new int[]{5,3,1,2,1,1};
20
c=new int[]{5,3,4,1,4,3};
21 
}
22
23 
@Test
24 
public void Distance(){
25 //
a-b:5.916079783099616
26 //
a-c:3.1622776601683795
27 //
c-b:4.795831523312719
28
29
System.out.println(String.format("a-b:%s", 1.0/(1.0+Man(a, b))));
30
System.out.println(String.format("a-c:%s", 1.0/(1.0+Man(a, c))));
31
System.out.println(String.format("c-b:%s", 1.0/(1.0+Man(c, b))));
32 //
a-b:0.08333333333333333
33 //
a-c:0.14285714285714285
34 //
c-b:0.1
35
36 
}
37
//欧式距离
38 
private double ErluD(int[] a_array,int[] b_array){
39
double result=0;
40
for (int i = 0; i < a_array.length; i++) {
41
result+=Math.pow(a_array[i]-b_array[i],2);
42 
}
43
return Math.sqrt(result);
44 
}
45
//曼哈顿距离
46 
private double Man(int[] a_array,int[] b_array){
47
double result=0;
48
for (int i = 0; i < a_array.length; i++) {
49
result+=Math.abs(a_array[i]-b_array[i]);
50 
}
51
return result;
52 
}
53
//min式距离
54 
private double Min(int[] a_array,int[] b_array,int p){
55
double result=0;
56
for (int i = 0; i < a_array.length; i++) {
57
result+=Math.pow(Math.abs(a_array[i]-b_array[i]),p);
58 
}
59
return Math.pow(result,1.0/p);
60 
}
61
62 }
View Code

 

 

3.MahoutDemo.java


1 package cn.crxy.mahout;

2

3 import java.io.File;

4 import java.util.List;

5

6 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;

7 import org.apache.mahout.cf.taste.impl.common.FastIDSet;

8 import org.apache.mahout.cf.taste.impl.model.GenericPreference;

9 import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
 10 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
 11 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
 12 import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
 13 import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
 14 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
 15 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
 16 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
 17 import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity;
 18 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
 19 import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
 20 import org.apache.mahout.cf.taste.model.DataModel;
 21 import org.apache.mahout.cf.taste.model.PreferenceArray;
 22 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
 23 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
 24 import org.apache.mahout.cf.taste.recommender.Recommender;
 25 import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 26 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 27 import org.junit.Before;
 28 import org.junit.Ignore;
 29 import org.junit.Test;
 30
 31 public class MahoutDemo {
 32
 33
//组装datamodel
 34
 35
// userid itemid score
 36
// 101 102 103 104
 37
// 1(5,4,2,)
 38
// 2(,2,4,1)
 39
// 3(4,3,1,)
 40 
DataModel dataModel;
 41
 42 
@Before
 43 
public void initData() throws Exception{
 44
//每一个用户的喜好列表 key:用户id
value:该用户的偏好列表
 45
FastByIDMap<PreferenceArray> data=new FastByIDMap<PreferenceArray>();
 46
//组装第一个用户 偏好列表
 47
PreferenceArray array1=new GenericUserPreferenceArray(3);
 48
//PreferenceArray index 指:偏好列表的index 序号。
 49
array1.setUserID(0, 1);
 50
array1.setItemID(0, 101);
 51
array1.setValue(0, 5);
 52
 53
array1.setUserID(1, 1);
 54
array1.setItemID(1, 102);
 55
array1.setValue(1, 4);
 56
 57
array1.setUserID(2, 1);
 58
array1.setItemID(2, 103);
 59
array1.setValue(2, 2);
 60
 61
data.put(1, array1);
 62
 63
//组装第二个喜好
 64
PreferenceArray array2=new GenericUserPreferenceArray(3);
 65
//2(,2,4,1)
 66
array2.set(0, new GenericPreference(2,102,2));
 67
array2.set(1, new GenericPreference(2,103,4));
 68
array2.set(2, new GenericPreference(2,104,1));
 69
data.put(2, array2);
 70
//组装第三个喜好
 71
PreferenceArray array3=new GenericUserPreferenceArray(3);
 72
//3(4,3,1,)
 73
array3.set(0, new GenericPreference(3,101,4));
 74
array3.set(1, new GenericPreference(3,102,3));
 75
array3.set(2, new GenericPreference(3,103,1));
 76
data.put(3, array3);
 77
 78
//dataModel=new GenericDataModel(data);
 79 //
dataModel=new GenericBooleanPrefDataModel(userData);
 80 //
System.out.println(dataModel.getPreferenceValue(1, 102));//获得1用户对102的评分
 81 //
System.out.println(dataModel.getItemIDsFromUser(1));
 82 //
System.out.println(dataModel.getUserIDs());
 83
 84
 85
//1
101 102 103
 86
//2 102 103
 87
// key为userid value:物品的集合 set
 88
FastByIDMap<FastIDSet> userData=new FastByIDMap<FastIDSet>();
 89
 90
FastIDSet userSet1=new FastIDSet(3);
 91
userSet1.add(101);
 92
userSet1.add(102);
 93
userSet1.add(103);
 94
userData.put(1,userSet1);
 95
 96
FastIDSet userSet2=new FastIDSet(2);
 97
userSet2.add(102);
 98
userSet2.add(103);
 99
userData.put(2,userSet2);
100
101
102
//无偏好的构建
103 //
dataModel=new GenericBooleanPrefDataModel(userData);
104
105
106
//读取文件 有偏好的
107
dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv"));
108
//读取文件 无偏好的 无偏好的数据只有用户和其关联的商品 没有对应商品的评分
109 //
dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\ubool.data"));
110
111
112 //
对于无偏好数据:getvalue:如果存在记录则是1.0;否则为null。
113 //
System.out.println(dataModel.getPreferenceValue(1, 103));
114 //
System.out.println(dataModel.getItemIDsFromUser(1));
115 //
System.out.println(dataModel.getUserIDs());
116
117 
}
118 
@Ignore
119 
public void testUserSimi() throws Exception{
120
121
//利用model和相似度函数 计算用户相似度
122 //
UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel);
123
UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
124
userSimilarity=new CachingUserSimilarity(userSimilarity, dataModel);
125
//查询用户之间的相似度
0.9999999999999998
0.944911182523068
126
//如果使用CachingUserSimilarity userSimilarity(1,5) 第二次不会再次计算了
127
System.out.println(userSimilarity.userSimilarity(1, 5));
128
System.out.println(userSimilarity.userSimilarity(1, 5));
129 
}
130 
@Ignore
131 
public void testItemSimi() throws Exception{
132
133
//利用model和相似度函数 计算物品相似度
134
ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel);
135
itemSimilarity =new CachingItemSimilarity(itemSimilarity,dataModel);
136
//查询物品之间的相似度 0.9449111825230729
137
System.out.println(itemSimilarity.itemSimilarity(101, 102));
138 
}
139 
@Test
140 
public void testuserNeighborhood() throws Exception{
141
//相似度
有相似度才能算邻居是谁
142
UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
143
//1.固定数目的邻居
如果取邻居 只取前三个
144
UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);
145
long[] userNeighborhoods = userNeighborhood.getUserNeighborhood(1);//为1用户取得用户
146
for (long l : userNeighborhoods) {
147
System.out.println(l+"NearestNUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l));
148 
}
149 //
4NearestNUserNeighborhoodsimi---0.9999999999999998
150 //
5NearestNUserNeighborhoodsimi---0.944911182523068
151 //
2NearestNUserNeighborhoodsimi---
-0.7642652566278799这个是负0.7
152
153
154
//2.固定阈值的邻居
只要0.8以上的
155
userNeighborhood=new ThresholdUserNeighborhood(0.7,userSimilarity,dataModel);
156
long[] userNeighborhoodsnew = userNeighborhood.getUserNeighborhood(1);
157
System.out.println(userSimilarity.userSimilarity(1, 2)); //查看1和2的相似度
158
for (long l : userNeighborhoodsnew) {
159
System.out.println(l+"ThresholdUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l));
160 
}
161
162 
}
163 
@Test
164 
public void testItemCmd() throws Exception{
165
//1.基于物品的有偏好的推荐
基于物品的不需要邻居
166 //
ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel);
167 //
Recommender recommender=new GenericItemBasedRecommender(dataModel,itemSimilarity);
168
169
//2.基于物品的无偏好推荐
170
ItemSimilarity itemSimilarity=new TanimotoCoefficientSimilarity(dataModel);
171
Recommender recommender=new GenericBooleanPrefItemBasedRecommender(dataModel,itemSimilarity);
172
173
174
List<RecommendedItem> recommend = recommender.recommend(1, 3);//给用户1推荐3个.
175
for (RecommendedItem recommendedItem : recommend) {
176 
System.out.println(recommendedItem);
177
//1.基于物品的有偏好的推荐RecommendedItem[item:104, value:5.0]其他的推荐不出来了....所以只推荐出了1个
178
179
//2.基于物品的无偏好的推荐
180
//RecommendedItem[item:104, value:1.8]
181
//RecommendedItem[item:106, value:1.15]
182
//RecommendedItem[item:105, value:0.85]
183 
}
184 
}
185 
@Test
186 
public void testUserCmd() throws Exception{
187
//1.基于用户的有偏好的推荐
188
//UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel);
189
//2.基于用户的无偏好的推荐
190
UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel);
191
192
UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);//Top 3
193
//构建推荐对象
194
Recommender recommender=new GenericUserBasedRecommender(dataModel,userNeighborhood,userSimilarity);
195
List<RecommendedItem> recommend = recommender.recommend(1, 3);
196
for (RecommendedItem recommendedItem : recommend) {
197 
System.out.println(recommendedItem);
198
//1.基于用户的有偏好推荐
199
//RecommendedItem[item:104, value:5.0]
200
//RecommendedItem[item:106, value:4.0]
201
//2.基于用户的无偏好推荐
202
//RecommendedItem[item:106, value:4.0]
203
//RecommendedItem[item:104, value:3.2121212]
204
205 
}
206 
}
207
208
209 }
View Code

 

最后

以上就是认真期待为你收集整理的Mahout使用(一)的全部内容,希望文章能够帮你解决Mahout使用(一)所遇到的程序开发问题。

如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(45)

评论列表共有 0 条评论

立即
投稿
返回
顶部