这是基于用户的协同过滤算法的实现,但为什么总是报错:
Traceback (most recent call last):
File "D:/haha/user", line 165, in <module>
testUserBasedCF()
File "D:/haha/user", line 155, in testUserBasedCF
cf = UserBasedCF('u.data')
File "D:/haha/user", line 8, in __init__
self.readData()
File "D:/haha/user", line 17, in readData
userid,itemid,record = line.split(' ')
ValueError: need more than 1 value to unpack
具体代码如下:
import random import math class UserBasedCF: def __init__(self,datafile = None): self.datafile = datafile self.readData() self.splitData(3,47) def readData(self,datafile = None): """ read the data from the data file which is a data set """ self.datafile = datafile or self.datafile self.data = [] for line in open(self.datafile): userid,itemid,record = line.split(' ') self.data.append((userid,itemid,int(record))) def splitData(self,k,seed,data=None,M = 8): """ split the data set testdata is a test data set traindata is a train set test data set / train data set is 1:M-1 """ self.testdata = {} self.traindata = {} data = data or self.data random.seed(seed) for user,item, record in self.data: if random.randint(0,M) == k: self.testdata.setdefault(user,{}) self.testdata[user][item] = record else: self.traindata.setdefault(user,{}) self.traindata[user][item] = record def userSimilarity(self,train = None): """ One method of getting user similarity matrix """ train = train or self.traindata self.userSim = dict() for u in train.keys(): for v in train.keys(): if u == v: continue self.userSim.setdefault(u,{}) self.userSim[u][v] = len(set(train[u].keys()) & set(train[v].keys())) self.userSim[u][v] /= math.sqrt(len(train[u]) * len(train[v]) *1.0) def userSimilarityBest(self,train = None): """ the other method of getting user similarity which is better than above you can get the method on page 46 In this experiment,we use this method """ train = train or self.traindata self.userSimBest = dict() item_users = dict() for u,item in train.items(): for i in item.keys(): item_users.setdefault(i,set()) item_users[i].add(u) user_item_count = dict() count = dict() for item,users in item_users.items(): for u in users: user_item_count.setdefault(u,0) user_item_count[u] += 1 for v in users: if u == v:continue count.setdefault(u,{}) count[u].setdefault(v,0) count[u][v] += 1 for u ,related_users in count.items(): self.userSimBest.setdefault(u,dict()) for v, cuv in related_users.items(): self.userSimBest[u][v] = cuv / math.sqrt(user_item_count[u] * user_item_count[v] * 1.0) def recommend(self,user,train = None,k = 8,nitem = 40): train = train or self.traindata rank = dict() interacted_items = train.get(user,{}) for v ,wuv in sorted(self.userSimBest[user].items(),key = lambda x : x[1],reverse = True)[0:k]: for i , rvi in train[v].items(): if i in interacted_items: continue rank.setdefault(i,0) rank[i] += wuv return dict(sorted(rank.items(),key = lambda x :x[1],reverse = True)[0:nitem]) def recallAndPrecision(self,train = None,test = None,k = 8,nitem = 10): """ Get the recall and precision, the method you want to know is listed in the page 43 """ train = train or self.traindata test = test or self.testdata hit = 0 recall = 0 precision = 0 for user in train.keys(): tu = test.get(user,{}) rank = self.recommend(user, train = train,k = k,nitem = nitem) for item,_ in rank.items(): if item in tu: hit += 1 recall += len(tu) precision += nitem return (hit / (recall * 1.0),hit / (precision * 1.0)) def coverage(self,train = None,test = None,k = 8,nitem = 10): train = train or self.traindata test = test or self.testdata recommend_items = set() all_items = set() for user in train.keys(): for item in train[user].keys(): all_items.add(item) rank = self.recommend(user, train, k = k, nitem = nitem) for item,_ in rank.items(): recommend_items.add(item) return len(recommend_items) / (len(all_items) * 1.0) def popularity(self,train = None,test = None,k = 8,nitem = 10): """ Get the popularity the algorithm on page 44 """ train = train or self.traindata test = test or self.testdata item_popularity = dict() for user ,items in train.items(): for item in items.keys(): item_popularity.setdefault(item,0) item_popularity[item] += 1 ret = 0 n = 0 for user in train.keys(): rank = self.recommend(user, train, k = k, nitem = nitem) for item ,_ in rank.items(): ret += math.log(1+item_popularity[item]) n += 1 return ret / (n * 1.0) def testRecommend(): ubcf = UserBasedCF('u.data') ubcf.readData() ubcf.splitData(4,100) ubcf.userSimilarity() user = "345" rank = ubcf.recommend(user,k = 3) for i,rvi in rank.items(): items = ubcf.testdata.get(user,{}) record = items.get(i,0) print(i,rvi,record) def testUserBasedCF(): cf = UserBasedCF('u.data') cf.userSimilarityBest() print ('K',"recall",'precision','coverage','popularity') for k in [5,10,20,40,80,160]: recall,precision = cf.recallAndPrecision(k = k) coverage = cf.coverage(k = k) popularity = cf.popularity(k = k) print(k,recall * 100,precision * 100,coverage * 100,popularity) if __name__ == "__main__": testUserBasedCF()
利用PyCharm用于一般IDE具备的功能,可不少人在Linux系统中安装PyCharm时会出现错误,提示Error: cannot start PyCharm,这是什么错误呢?我们又该如何来解决呢?下面小编就教大家Linux安装PyCharm提示错误的解决方法,有兴趣的网友们可以一起来学习下。。
Error: cannot start PyCharm
No JDK found to run PyCharm. Please validate either PYCHARM_JDK, JDK_HOME or JAVA_HOME光纤激光切割机 http://www.6618cnc.comenvironment variable points to valid JDK installation.
解决方法:
进入jdk目录, cd 。。/。。/jdk1.7.0_60/
执行 export JAVA_HOME= 。。/。。/jdk1.7.0_60/(按各自电脑的jdk目录设置)
然后重新安装pycharm即可。
上面就是小编总结的Linux安装PyCharm提示错误的解决方法,如果你在安装的过程中出现了错误提示,按照本文介绍的方法进行重新安装即可。
估计你的 line.split()只返回了一个值,不能赋给左边的三个变量。
>>>'hello'.split('')
['hello']
>>>a,b,c='hello'.split('')ValueError:needmorethan1valuetounpack
是不是line没有值,控制台打出来看一下。
print(heloo)
版权声明:本文内容由阿里云实名注册用户自发贡献,版权归原作者所有,阿里云开发者社区不拥有其著作权,亦不承担相应法律责任。具体规则请查看《阿里云开发者社区用户服务协议》和《阿里云开发者社区知识产权保护指引》。如果您发现本社区中有涉嫌抄袭的内容,填写侵权投诉表单进行举报,一经查实,本社区将立刻删除涉嫌侵权内容。