-
原来的算法模型中,没有考虑买了又买的物品,对于钢材行业,用户的需求是一致的,经常买了又买,所以增加此功能
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
a 模型训练中增加如下代码
//buy item for users
JavaPairRDD<String, String> buyItemForUser = data.getBuyEvents().mapToPair(
new
PairFunction<UserItemEvent, Tuple2<String, String>, Integer>() {
@Override
public
Tuple2<Tuple2<String, String>, Integer> call(UserItemEvent buyEvent)
throws
Exception {
return
new
Tuple2<>(
new
Tuple2<>(buyEvent.getUser(), buyEvent.getItem()),
1
);
}
}).mapToPair(
new
PairFunction<Tuple2<Tuple2<String, String>, Integer>, String, Integer>() {
@Override
public
Tuple2<String, Integer> call(Tuple2<Tuple2<String, String>, Integer> element)
throws
Exception {
return
new
Tuple2<>(element._1()._1()+
":::"
+element._1()._2(), element._2());
}
}).reduceByKey(
new
Function2<Integer, Integer, Integer>() {
@Override
public
Integer call(Integer integer, Integer integer2)
throws
Exception {
return
integer + integer2;
}
}).mapToPair(
new
PairFunction<Tuple2<String, Integer>, String, String>() {
@Override
public
Tuple2<String, String> call(Tuple2<String, Integer> element)
throws
Exception {
String temp[]=element._1().split(
":::"
);
if
(temp.length==
2
){
return
new
Tuple2<>(temp[
0
], temp[
1
]);
}
return
new
Tuple2<>(
""
,
""
);
}
});
b 增加预测算法
private
List<ItemScore> buyItemForUser(Model model, Query query){
logger.info(
"start to add buy item for the user"
);
final
JavaRDD<ItemScore> matchedUser = model.getUserBuyItem().filter(
new
Function<Tuple2<String, String>, Boolean>() {
@Override
public
Boolean call(Tuple2<String, String> userIndex)
throws
Exception {
return
userIndex._1().equals(query.getUserEntityId());
}
}).map(
new
Function<Tuple2<String,String>,ItemScore>() {
@Override
public
ItemScore call(Tuple2<String, String> arg0)
throws
Exception {
return
new
ItemScore(arg0._2(),
10
);
}
});
return
matchedUser.collect();
}
c topItemsForUser 按照你的业务逻辑出来两者的排序规则
|
基于物的相似性,也是我们期望,加入以下代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
//根据事件查找物品的属性
private
List<Set<String>> getRecentProductCategory(Query query, Model model) {
try
{
List<Set<String>> result =
new
ArrayList<>();
List<Event> events = LJavaEventStore.findByEntity(
ap.getAppName(),
"user"
,
query.getUserEntityId(),
OptionHelper.<String>none(),
OptionHelper.some(ap.getSimilarItemEvents()),
OptionHelper.some(OptionHelper.some(
"item"
)),
OptionHelper.<Option<String>>none(),
OptionHelper.<DateTime>none(),
OptionHelper.<DateTime>none(),
OptionHelper.some(
10
),
true
,
Duration.apply(
10
, TimeUnit.SECONDS));
for
(
final
Event event : events) {
if
(event.targetEntityId().isDefined()) {
JavaPairRDD<String, Integer> filtered = model.getItemIndex().filter(
new
Function<Tuple2<String, Integer>, Boolean>() {
@Override
public
Boolean call(Tuple2<String, Integer> element)
throws
Exception {
return
element._1().equals(event.targetEntityId().get());
}
});
final
String itemIndex = filtered.first()._1();
Item item = model.getItems().get(itemIndex);
if
(item.getCategories()!=
null
&& item.getCategories().size()>
0
){
result.add(item.getCategories());
}
}
}
return
result;
}
catch
(Exception e) {
logger.error(
"Error reading recent events for user "
+ query.getUserEntityId());
throw
new
RuntimeException(e.getMessage(), e);
}
}
//相似性比较
private
List<ItemScore> similarItemsByCategory(
final
List<Set<String>> category, Model model, Query query) {
Map<String, Item> items =model.getItems();
if
(items==
null
|| items.size()==
0
){
return
null
;
}
if
(category==
null
|| category.size()==
0
){
return
null
;
}
JavaRDD<ItemScore> itemScores = model.getItemIndex().map(
new
Function<Tuple2<String, Integer>, ItemScore>() {
@Override
public
ItemScore call(Tuple2<String, Integer> idItem)
throws
Exception {
String itemid= idItem._1();
Item item = items.get(itemid);
double
similarity =
0.0
;
for
(
int
i=
0
; i<category.size(); i++){
similarity+=getDistance(category.get(i),item.getCategories());
}
logger.info(itemid+
"->"
+similarity);
return
(
new
ItemScore(itemid, similarity));
}
});
itemScores = validScores(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId());
return
sortAndTake(itemScores, query.getNumber());
/* List<ItemScore> itemScores=new ArrayList<ItemScore>();
for (Map.Entry<String, Item> entry : items.entrySet()) {
Item it = entry.getValue();
double similarity = 0.0;
for(int i=0 ; i<category.size(); i++){
similarity+=getDistance(category.get(i),it.getCategories());
}
itemScores.add(new ItemScore(it.getEntityId(), similarity));
}
itemScores = validScoresForList(itemScores, query.getWhitelist(), query.getBlacklist(), query.getCategories(), model.getItems(), query.getUserEntityId());
return sortAndTake(itemScores, query.getNumber());*/
}
//相似算法,比较简单
public
static
int
getDistance(Set<String> t, Set<String> s) {
if
(t==
null
|| t.size()==
0
|| s==
null
|| s.size()==
0
|| t.size() != s.size()) {
return
0
;
}
HashSet<String> t_temp=
new
HashSet<String>(t);
//必须转一下
HashSet<String> s_temp=
new
HashSet<String>(s);
t_temp.retainAll(s_temp);
return
t_temp.size();
}
最后按照你的业务逻辑,加入相似的物品。
|
本文转自whk66668888 51CTO博客,原文链接:http://blog.51cto.com/12597095/1983709