概述
1.HelloMahout.java
2.DistanceTest.java
3.MahoutDemo.java
1.HelloMahout.java
1 package cn.crxy.mahout; 2 3 import java.io.File; 4 import java.util.List; 5 6 import org.apache.log4j.Logger; 7 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; 8 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; 9 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; 10 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; 11 import org.apache.mahout.cf.taste.model.DataModel; 12 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; 13 import org.apache.mahout.cf.taste.recommender.RecommendedItem; 14 import org.apache.mahout.cf.taste.recommender.Recommender; 15 import org.apache.mahout.cf.taste.similarity.UserSimilarity; 16 17 public class HelloMahout { 18 19 public static void main(String[] args) { 20 21 Logger logger=Logger.getLogger(HelloMahout.class); 22 try { 23 //读取用户评分数据 封装成一个model 24 DataModel model = new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv")); 25 // 根据相似度找出对应的好朋友的标准 物以类聚,人以群分 26 UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(model); 27 // 邻域 选择两个好朋友帮我推荐 28 UserNeighborhood userNeighborhood = new NearestNUserNeighborhood(2,userSimilarity, model); 29 // 构建推荐引擎 30 Recommender recommender = new GenericUserBasedRecommender(model,userNeighborhood, userSimilarity); 31 // 进行推荐 32 List<RecommendedItem> recommend = recommender.recommend(1, 5); 33 for (RecommendedItem item : recommend) { 34 logger.info(item); 35 } 36 } catch (Exception e) { 37 logger.error(e.getMessage()); 38 } 39 } 40 }
2.DistanceTest.java
1 package cn.crxy.mahout; 2 3 import org.junit.Before; 4 import org.junit.Test; 5 6 public class DistanceTest { 7 8 // 水果维度依次为:苹果、梨、桃子、栗子、香蕉、橘子 9 // 小明:5,4,2,1,5,5 10 // 小丽:5,3,1,2,1,1 11 // 小王:5,3,4,1,4,3 12 private int[] a; 13 private int[] b; 14 private int[] c; 15 16 @Before 17 public void initData(){ 18 a=new int[]{5,4,2,1,5,5}; 19 b=new int[]{5,3,1,2,1,1}; 20 c=new int[]{5,3,4,1,4,3}; 21 } 22 23 @Test 24 public void Distance(){ 25 // a-b:5.916079783099616 26 // a-c:3.1622776601683795 27 // c-b:4.795831523312719 28 29 System.out.println(String.format("a-b:%s", 1.0/(1.0+Man(a, b)))); 30 System.out.println(String.format("a-c:%s", 1.0/(1.0+Man(a, c)))); 31 System.out.println(String.format("c-b:%s", 1.0/(1.0+Man(c, b)))); 32 // a-b:0.08333333333333333 33 // a-c:0.14285714285714285 34 // c-b:0.1 35 36 } 37 //欧式距离 38 private double ErluD(int[] a_array,int[] b_array){ 39 double result=0; 40 for (int i = 0; i < a_array.length; i++) { 41 result+=Math.pow(a_array[i]-b_array[i],2); 42 } 43 return Math.sqrt(result); 44 } 45 //曼哈顿距离 46 private double Man(int[] a_array,int[] b_array){ 47 double result=0; 48 for (int i = 0; i < a_array.length; i++) { 49 result+=Math.abs(a_array[i]-b_array[i]); 50 } 51 return result; 52 } 53 //min式距离 54 private double Min(int[] a_array,int[] b_array,int p){ 55 double result=0; 56 for (int i = 0; i < a_array.length; i++) { 57 result+=Math.pow(Math.abs(a_array[i]-b_array[i]),p); 58 } 59 return Math.pow(result,1.0/p); 60 } 61 62 }
3.MahoutDemo.java
1 package cn.crxy.mahout; 2 3 import java.io.File; 4 import java.util.List; 5 6 import org.apache.mahout.cf.taste.impl.common.FastByIDMap; 7 import org.apache.mahout.cf.taste.impl.common.FastIDSet; 8 import org.apache.mahout.cf.taste.impl.model.GenericPreference; 9 import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray; 10 import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; 11 import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; 12 import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; 13 import org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender; 14 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender; 15 import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; 16 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity; 17 import org.apache.mahout.cf.taste.impl.similarity.CachingUserSimilarity; 18 import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; 19 import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity; 20 import org.apache.mahout.cf.taste.model.DataModel; 21 import org.apache.mahout.cf.taste.model.PreferenceArray; 22 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; 23 import org.apache.mahout.cf.taste.recommender.RecommendedItem; 24 import org.apache.mahout.cf.taste.recommender.Recommender; 25 import org.apache.mahout.cf.taste.similarity.ItemSimilarity; 26 import org.apache.mahout.cf.taste.similarity.UserSimilarity; 27 import org.junit.Before; 28 import org.junit.Ignore; 29 import org.junit.Test; 30 31 public class MahoutDemo { 32 33 //组装datamodel 34 35 // userid itemid score 36 // 101 102 103 104 37 // 1(5,4,2,) 38 // 2(,2,4,1) 39 // 3(4,3,1,) 40 DataModel dataModel; 41 42 @Before 43 public void initData() throws Exception{ 44 //每一个用户的喜好列表 key:用户id value:该用户的偏好列表 45 FastByIDMap<PreferenceArray> data=new FastByIDMap<PreferenceArray>(); 46 //组装第一个用户 偏好列表 47 PreferenceArray array1=new GenericUserPreferenceArray(3); 48 //PreferenceArray index 指:偏好列表的index 序号。 49 array1.setUserID(0, 1); 50 array1.setItemID(0, 101); 51 array1.setValue(0, 5); 52 53 array1.setUserID(1, 1); 54 array1.setItemID(1, 102); 55 array1.setValue(1, 4); 56 57 array1.setUserID(2, 1); 58 array1.setItemID(2, 103); 59 array1.setValue(2, 2); 60 61 data.put(1, array1); 62 63 //组装第二个喜好 64 PreferenceArray array2=new GenericUserPreferenceArray(3); 65 //2(,2,4,1) 66 array2.set(0, new GenericPreference(2,102,2)); 67 array2.set(1, new GenericPreference(2,103,4)); 68 array2.set(2, new GenericPreference(2,104,1)); 69 data.put(2, array2); 70 //组装第三个喜好 71 PreferenceArray array3=new GenericUserPreferenceArray(3); 72 //3(4,3,1,) 73 array3.set(0, new GenericPreference(3,101,4)); 74 array3.set(1, new GenericPreference(3,102,3)); 75 array3.set(2, new GenericPreference(3,103,1)); 76 data.put(3, array3); 77 78 //dataModel=new GenericDataModel(data); 79 // dataModel=new GenericBooleanPrefDataModel(userData); 80 // System.out.println(dataModel.getPreferenceValue(1, 102));//获得1用户对102的评分 81 // System.out.println(dataModel.getItemIDsFromUser(1)); 82 // System.out.println(dataModel.getUserIDs()); 83 84 85 //1 101 102 103 86 //2 102 103 87 // key为userid value:物品的集合 set 88 FastByIDMap<FastIDSet> userData=new FastByIDMap<FastIDSet>(); 89 90 FastIDSet userSet1=new FastIDSet(3); 91 userSet1.add(101); 92 userSet1.add(102); 93 userSet1.add(103); 94 userData.put(1,userSet1); 95 96 FastIDSet userSet2=new FastIDSet(2); 97 userSet2.add(102); 98 userSet2.add(103); 99 userData.put(2,userSet2); 100 101 102 //无偏好的构建 103 // dataModel=new GenericBooleanPrefDataModel(userData); 104 105 106 //读取文件 有偏好的 107 dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\info.csv")); 108 //读取文件 无偏好的 无偏好的数据只有用户和其关联的商品 没有对应商品的评分 109 // dataModel=new FileDataModel(new File("F:\360Downloads\超人学院\第14期视频\2016-09-12【mahout】\样本数据\ubool.data")); 110 111 112 // 对于无偏好数据:getvalue:如果存在记录则是1.0;否则为null。 113 // System.out.println(dataModel.getPreferenceValue(1, 103)); 114 // System.out.println(dataModel.getItemIDsFromUser(1)); 115 // System.out.println(dataModel.getUserIDs()); 116 117 } 118 @Ignore 119 public void testUserSimi() throws Exception{ 120 121 //利用model和相似度函数 计算用户相似度 122 // UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel); 123 UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel); 124 userSimilarity=new CachingUserSimilarity(userSimilarity, dataModel); 125 //查询用户之间的相似度 0.9999999999999998 0.944911182523068 126 //如果使用CachingUserSimilarity userSimilarity(1,5) 第二次不会再次计算了 127 System.out.println(userSimilarity.userSimilarity(1, 5)); 128 System.out.println(userSimilarity.userSimilarity(1, 5)); 129 } 130 @Ignore 131 public void testItemSimi() throws Exception{ 132 133 //利用model和相似度函数 计算物品相似度 134 ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel); 135 itemSimilarity =new CachingItemSimilarity(itemSimilarity,dataModel); 136 //查询物品之间的相似度 0.9449111825230729 137 System.out.println(itemSimilarity.itemSimilarity(101, 102)); 138 } 139 @Test 140 public void testuserNeighborhood() throws Exception{ 141 //相似度 有相似度才能算邻居是谁 142 UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel); 143 //1.固定数目的邻居 如果取邻居 只取前三个 144 UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel); 145 long[] userNeighborhoods = userNeighborhood.getUserNeighborhood(1);//为1用户取得用户 146 for (long l : userNeighborhoods) { 147 System.out.println(l+"NearestNUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l)); 148 } 149 // 4NearestNUserNeighborhoodsimi---0.9999999999999998 150 // 5NearestNUserNeighborhoodsimi---0.944911182523068 151 // 2NearestNUserNeighborhoodsimi--- -0.7642652566278799这个是负0.7 152 153 154 //2.固定阈值的邻居 只要0.8以上的 155 userNeighborhood=new ThresholdUserNeighborhood(0.7,userSimilarity,dataModel); 156 long[] userNeighborhoodsnew = userNeighborhood.getUserNeighborhood(1); 157 System.out.println(userSimilarity.userSimilarity(1, 2)); //查看1和2的相似度 158 for (long l : userNeighborhoodsnew) { 159 System.out.println(l+"ThresholdUserNeighborhoodsimi---"+userSimilarity.userSimilarity(1, l)); 160 } 161 162 } 163 @Test 164 public void testItemCmd() throws Exception{ 165 //1.基于物品的有偏好的推荐 基于物品的不需要邻居 166 // ItemSimilarity itemSimilarity=new PearsonCorrelationSimilarity(dataModel); 167 // Recommender recommender=new GenericItemBasedRecommender(dataModel,itemSimilarity); 168 169 //2.基于物品的无偏好推荐 170 ItemSimilarity itemSimilarity=new TanimotoCoefficientSimilarity(dataModel); 171 Recommender recommender=new GenericBooleanPrefItemBasedRecommender(dataModel,itemSimilarity); 172 173 174 List<RecommendedItem> recommend = recommender.recommend(1, 3);//给用户1推荐3个. 175 for (RecommendedItem recommendedItem : recommend) { 176 System.out.println(recommendedItem); 177 //1.基于物品的有偏好的推荐RecommendedItem[item:104, value:5.0]其他的推荐不出来了....所以只推荐出了1个 178 179 //2.基于物品的无偏好的推荐 180 //RecommendedItem[item:104, value:1.8] 181 //RecommendedItem[item:106, value:1.15] 182 //RecommendedItem[item:105, value:0.85] 183 } 184 } 185 @Test 186 public void testUserCmd() throws Exception{ 187 //1.基于用户的有偏好的推荐 188 //UserSimilarity userSimilarity=new PearsonCorrelationSimilarity(dataModel); 189 //2.基于用户的无偏好的推荐 190 UserSimilarity userSimilarity=new TanimotoCoefficientSimilarity(dataModel); 191 192 UserNeighborhood userNeighborhood=new NearestNUserNeighborhood(3,userSimilarity,dataModel);//Top 3 193 //构建推荐对象 194 Recommender recommender=new GenericUserBasedRecommender(dataModel,userNeighborhood,userSimilarity); 195 List<RecommendedItem> recommend = recommender.recommend(1, 3); 196 for (RecommendedItem recommendedItem : recommend) { 197 System.out.println(recommendedItem); 198 //1.基于用户的有偏好推荐 199 //RecommendedItem[item:104, value:5.0] 200 //RecommendedItem[item:106, value:4.0] 201 //2.基于用户的无偏好推荐 202 //RecommendedItem[item:106, value:4.0] 203 //RecommendedItem[item:104, value:3.2121212] 204 205 } 206 } 207 208 209 }
最后
以上就是认真期待为你收集整理的Mahout使用(一)的全部内容,希望文章能够帮你解决Mahout使用(一)所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复