# -*- coding: utf-8 -*- # @Date : 2019-02-14 # @Author : Peng Shiyu from copy import deepcopy import numpy as np from sklearn.feature_extraction import DictVectorizer from sklearn.metrics.pairwise import cosine_similarity # 数据准备:{书名: 评分} # user = {"红楼梦", "西游记", "水浒传", "三国演义"} user1 = {"红楼梦": 4, "西游记": 3} user2 = {"红楼梦": 5, "西游记": 6, "水浒传": 3} user3 = {"红楼梦": 4, "西游记": 3, "三国演义": 5} user4 = {"西游记": 4, "三国演义": 5} data = [ user1, user2, user3, user4 ] # 特征提取 dict_vectorizer = DictVectorizer(dtype=np.int32, sparse=False) result = dict_vectorizer.fit_transform(data) books = dict_vectorizer.get_feature_names() print(dict_vectorizer.get_feature_names()) print(result) # 余弦相似度矩阵 user_similarity = cosine_similarity(result) print(user_similarity) for user_id, user_looked in enumerate(data): user_suggest = user_similarity[user_id].tolist() # 找到与之相似度最高的两个人 user_suggest_bak = deepcopy(user_suggest) user_suggest_bak.sort(reverse=True) max_similar = user_suggest_bak[1: 3] print(max_similar) max_index = list(map(user_suggest.index, max_similar)) print(max_index) suggest = {} for index, user in enumerate([data[i] for i in max_index]): for key, value in user.items(): if key not in user_looked: suggest[key] = user_suggest[index] * value print(suggest) """ ['三国演义', '水浒传', '红楼梦', '西游记'] [[0 0 4 3] [0 3 5 6] [5 0 4 3] [5 0 0 4]] [[1. 0.90837374 0.70710678 0.37481703] [0.90837374 1. 0.64231723 0.44799204] [0.70710678 0.64231723 1. 0.81719329] [0.37481703 0.44799204 0.81719329 1. ]] [0.9083737430941391, 0.7071067811865475] {'水浒传': 3.0, '三国演义': 4.541868715470695} [0.9083737430941391, 0.6423172335936725] {'三国演义': 4.999999999999999} [0.8171932929538644, 0.7071067811865475] {} [0.8171932929538644, 0.44799203576793445] {'红楼梦': 2.2399601788396724, '水浒传': 1.3439761073038032} """