1. 原来的算法模型中,没有考虑买了又买的物品,对于钢材行业,用户的需求是一致的,经常买了又买,所以增加此功能

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
  a 模型训练中增加如下代码
  
         //buy item for users
         JavaPairRDD<String, String> buyItemForUser = data.getBuyEvents().mapToPair( new  PairFunction<UserItemEvent, Tuple2<String, String>, Integer>() {
             @Override
             public  Tuple2<Tuple2<String, String>, Integer> call(UserItemEvent buyEvent)  throws  Exception {
                 return   new  Tuple2<>( new  Tuple2<>(buyEvent.getUser(), buyEvent.getItem()),  1 );
             }
         }).mapToPair( new  PairFunction<Tuple2<Tuple2<String, String>, Integer>, String, Integer>() {
             @Override
             public  Tuple2<String, Integer> call(Tuple2<Tuple2<String, String>, Integer> element)  throws  Exception {
                 return  new  Tuple2<>(element._1()._1()+ ":::" +element._1()._2(), element._2());
             }
         }).reduceByKey( new  Function2<Integer, Integer, Integer>() {
             @Override
             public  Integer call(Integer integer, Integer integer2)  throws  Exception {
                 return  integer + integer2;
             }
         }).mapToPair( new  PairFunction<Tuple2<String, Integer>, String, String>() {
             @Override
             public  Tuple2<String, String> call(Tuple2<String, Integer> element)  throws  Exception {
                 String temp[]=element._1().split( ":::" );
                 if (temp.length== 2 ){
                     return  new  Tuple2<>(temp[ 0 ], temp[ 1 ]);
                 }            
                 return  new  Tuple2<>( "" "" );              
             }
         });
         
         b 增加预测算法
             private  List<ItemScore> buyItemForUser(Model model, Query query){
         logger.info( "start to add buy item for the user" );
         final  JavaRDD<ItemScore> matchedUser = model.getUserBuyItem().filter( new  Function<Tuple2<String, String>, Boolean>() {
             @Override
             public  Boolean call(Tuple2<String, String> userIndex)  throws  Exception {
                 return  userIndex._1().equals(query.getUserEntityId());
             }
         }).map( new  Function<Tuple2<String,String>,ItemScore>() {
             @Override
             public  ItemScore call(Tuple2<String, String> arg0)  throws  Exception {
                   return  new  ItemScore(arg0._2(), 10 );
             }            
         });
         
         return  matchedUser.collect();
         
     }
     
     c topItemsForUser 按照你的业务逻辑出来两者的排序规则


基于物的相似性,也是我们期望,加入以下代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
//根据事件查找物品的属性
     private  List<Set<String>> getRecentProductCategory(Query query, Model model) {
         try  {
             List<Set<String>> result =  new  ArrayList<>();
 
             List<Event> events = LJavaEventStore.findByEntity(
                     ap.getAppName(),
                     "user" ,
                     query.getUserEntityId(),
                     OptionHelper.<String>none(),
                     OptionHelper.some(ap.getSimilarItemEvents()),
                     OptionHelper.some(OptionHelper.some( "item" )),
                     OptionHelper.<Option<String>>none(),
                     OptionHelper.<DateTime>none(),
                     OptionHelper.<DateTime>none(),
                     OptionHelper.some( 10 ),
                     true ,
                     Duration.apply( 10 , TimeUnit.SECONDS));
 
             for  ( final  Event event : events) {
                 if  (event.targetEntityId().isDefined()) {
                     JavaPairRDD<String, Integer> filtered = model.getItemIndex().filter( new  Function<Tuple2<String, Integer>, Boolean>() {
                         @Override
                         public  Boolean call(Tuple2<String, Integer> element)  throws  Exception {
                             return  element._1().equals(event.targetEntityId().get());
                         }
                     });
 
                     final  String itemIndex = filtered.first()._1();
                     
                     
                     Item item = model.getItems().get(itemIndex);
                     if (item.getCategories()!= null  && item.getCategories().size()> 0 ){
                        result.add(item.getCategories());
                     }
                 }
             }
 
             return  result;
         catch  (Exception e) {
             logger.error( "Error reading recent events for user "  + query.getUserEntityId());
             throw  new  RuntimeException(e.getMessage(), e);
         }
     }
     
     //相似性比较
     private  List<ItemScore> similarItemsByCategory( final  List<Set<String>> category, Model model, Query query) {
         
         Map<String, Item> items =model.getItems();
         if (items== null  || items.size()== 0 ){
             return  null ;
         }
         
         if (category== null  || category.size()== 0 ){
             return  null ;
         }
         
         JavaRDD<ItemScore> itemScores = model.getItemIndex().map( new  Function<Tuple2<String, Integer>, ItemScore>() {
             @Override
             public  ItemScore call(Tuple2<String, Integer> idItem)  throws  Exception {
                 String itemid= idItem._1();
                 Item item = items.get(itemid);            
                 double  similarity =  0.0 ;
                 for ( int  i= 0  ; i<category.size(); i++){
                     similarity+=getDistance(category.get(i),item.getCategories());
                 }
                 logger.info(itemid+ "->" +similarity);
                 return  ( new  ItemScore(itemid, similarity));
                 
             }
         });
         
         itemScores = validScores(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId());
         
         return  sortAndTake(itemScores, query.getNumber());
         
       /*  List<ItemScore> itemScores=new ArrayList<ItemScore>();
         
         for (Map.Entry<String, Item> entry : items.entrySet()) {
              Item it = entry.getValue();
              double similarity = 0.0;
              for(int i=0 ; i<category.size(); i++){
                  similarity+=getDistance(category.get(i),it.getCategories());
              }
              itemScores.add(new ItemScore(it.getEntityId(), similarity));
         }
         
         
         
         itemScores = validScoresForList(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId());
         return sortAndTake(itemScores, query.getNumber());*/
     }
     
     //相似算法,比较简单
     public  static  int  getDistance(Set<String> t, Set<String> s) {  
         if  (t== null  || t.size()== 0  || s== null  || s.size()== 0  || t.size() != s.size()) {            
             return  0 ;
         }
         HashSet<String> t_temp= new  HashSet<String>(t); //必须转一下
         HashSet<String> s_temp= new  HashSet<String>(s);
         t_temp.retainAll(s_temp);
         return  t_temp.size();
         
     }
     
     最后按照你的业务逻辑,加入相似的物品。