经典推荐算法实现
目录1.基于ItemCF算法2.基于UserCF算法3.基于时间上下文的个性化推荐4.基于LFM算法的个性化推荐5.基于图的推荐算法1.基于ItemCF算法#!/usr/sbin/env python# -*- coding:utf-8 -*-import math# ItemCF算法def ItemSimilarity(train):...
·
目录
1.基于ItemCF算法
#!/usr/sbin/env python
# -*- coding:utf-8 -*-
import math
# ItemCF算法
def ItemSimilarity(train):
C = dict()
N = dict()
for u,items in train.items():
for i in items.keys():
N[i] += 1
for j in items.keys():
if i == j:
continue
C[i][j] += 1
W = dict()
for i,related_items in C.items():
for j,cij in related_items.items():
W[i][j] = cij / math.sqrt( N[i] * N[j])
return W
# ItemCF-IUF算法
def ItemSimilarity_v2(train):
C = dict()
N = dict()
for u,items in train.items():
for i in items.keys():
N[i] += 1
for j in items.keys():
if i == j:
continue
C[i][j] += 1 / math.log(1+len(items)*1.0)
W = dict()
for i,related_items in C.items():
for j,cij in related_items.items():
W[i][j] = cij / math.sqrt( N[i] * N[j])
return W
def Recommend(train,user_id,W,K):
rank = dict()
ru = train[user_id]
for i,pi in ru.items():
for j,wj in sorted(W[i].items,key=itemgetter(1),reverse=True)[0:K]:
if j in ru:
continue
rank[j] += pi*wj
return rank
2.基于UserCF算法
import math
'''
基于UserCF的推荐算法
'''
# UserCF算法
def UserSimilarity(train):
item_users = dict()
for u,items in train.items():
for i in items.keys():
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
C = dict()
N = dict()
for i,users in item_users.items():
for u in users:
N[u] += 1
for v in users:
if u == v:
continue
C[u][v] += 1
W = dict()
for u,related_users in C.items():
for v,cuv in related_users.items():
W[u][v] = cuv / math.sqrt(N[u] * N[v])
return W
# User-IIF算法
def UserSimilarity_v2(train):
item_users = dict()
for u,items in train.items():
for i in items.keys():
if i not in item_users:
item_users[i] = set()
item_users[i].add(u)
C = dict()
N = dict()
for i,users in item_users.items():
for u in users:
N[u] += 1
for v in users:
if u == v:
continue
C[u][v] += 1 / math.log(1+len(users))
W = dict()
for u,related_users in C.items():
for v,cuv in related_users.items():
W[u][v] = cuv / math.sqrt(N[u] * N[v])
return W
def Recommend(user,train,W):
rank = dict()
interacted_items = train[user]
for v,wuv in sorted(W[u].items,key=itemgetter(1),reverse=True)[0:K]:
for i,rvi in train[v].items:
if i in interacted_items:
continue
rank[i] += wuv*rvi
return rank
3.基于时间上下文的个性化推荐
import math
def RecentPopularity(records,alpha,T):
ret = dict()
for user,item,tm in records:
if tm >= T:
continue
addToDict(ret,item,1/(1.0+alpha*(T-tm)))
return ret
def addToDict(dicts,item,value):
pass
def ItemSimilarity(train,alpha):
C = dict()
N = dict()
for u,items in train.items():
for i,tui in items.items():
N[i] += 1
for j,tuj in items.items():
if i == j:
continue
C[i][j] += 1 / (1+alpha*abs(tui-tuj))
W = dict()
for i,related_items in C.items():
for j,cij in related_items.items():
W[i][j] = cij / math.sqrt(N[i] * N[j])
return W
def RecommendItemCF(train,user_id,W,K,t0):
rank = dict()
ru = train[user_id]
for i,pi in ru.items():
for j,wj in sorted(W[i].items(),\
key=itemgetter(1),reverse=True)[0:K]:
if j,tuj in ru.items():
continue
rank[j] += pi * wj / (1 + alpha * (t0 - tuj))
return rank
def UserSimilarity(train):
item_users = dict()
for u,items in train.items():
for i,tui in items.items():
if i not in item_users:
item_users[i] = dict()
item_users[i][u] = tui
C = dict()
N = dict()
for i,users in item_users.items():
for u,tui in users.items():
N[u] += 1
for v,tvi in users.items():
if u == v:
continue
C[u][v] += 1 / (1 + alpha * abs(tui - tvi))
W = dict()
for u,related_users in C.items():
for v,cuv in related_users.items():
W[u][v] = cuv / math.sqrt(N[u] * N[v])
return W
def RecommendUserCF(user,T,train,W):
rank = dict()
interacted_items = train[user]
for v,wuv in sorted(W[u].items,key=itemgetter(1),\
reverse=True)[0:K]:
for i,tvi in train[v].items:
if i in interacted_items:
continue
rank[i] += wuv / (1 + alpha * (T - tvi))
return rank
4.基于LFM算法的个性化推荐
import random
'''
items => {'12':'PHP','1203':'Storm','123':'Ubuntu'}
items_pool => [12,32,121,324,532,123,53,1203,429,2932]
user_items => {'1010':[12,1203,123,429]}
'''
def RandomSelectNagativeSample(items):
ret = dict()
for i in items.keys():
ret[i] = 1
n = 0
for i in range(0,len(items)*3):
item = items_pool[random.randint(0,len(items_pool)-1)]
if item in ret:
continue
ret[item] = 0
n += 1
if n > len(items):
break
return ret
def InitModel(user_items,F):
P = dict()
Q = dict()
for u in user_items.keys():
if u not in P:
P[u] = {}
for f in range(0,F):
P[u][f] = 1
items = user_items.values()
itemLen = len(items[0])
i = 0
while i< itemLen:
ii = items[0][i]
if ii not in Q:
Q[ii] = {}
for f in range(0,F):
Q[ii][f] = 1
i += 1
return [P,Q]
def LatentFactorModel(user_items,F,N,alpha,lambda1):
[P,Q] = InitModel(user_items,F)
for setup in range(0,N):
for user,items in user_items.items():
samples = RandomSelectNagativeSample(items)
for item,rui in samples.items():
eui = rui - Predict(user,item)
for f in range(0,F):
P[user][f] += alpha * (eui * Q[item][f] - lambda1 * P[user][f])
Q[item][f] += alpha * (eui * P[user][f] - lambda1 * Q[item][f])
alpha *= 0.9
return [P,Q]
def Recommend(user,P,Q):
rank = dict()
for f,puf in P[user].items():
for i,pfi in Q[f].items():
if i not in rank:
rank[i] += puf * qfi
return rank
def PersonalRank(G,alpha,root,maxsetup):
rank = dict()
#rank = {x:0 for x in G.keys()}
rank = rank.fromkeys(G.keys(),0)
rank[root] = 1
for k in range(maxsetup):
tmp = dict()
#tmp = {x:0 for x in G.keys()}
tmp = tmp.fromkeys(G.keys(),0)
for i,ri in G.items():
for j,wij in ri.items():
if j not in tmp:
tmp[j] = 0
tmp[j] += alpha * rank[i]/(1.0*len(ri))
if j == root:
tmp[j] += 1 - alpha
rank = tmp
print 'iter:' + str(k) + "\t",
for key,value in rank.items():
print "%s:%.3f,\t" % (key,value),
print
return rank
if __name__ == '__main__':
G = {'A':{'a':1,'c':1},
'B':{'a':1,'b':1,'c':1,'d':1},
'C':{'c':1,'d':1},
'a':{'A':1,'B':1},
'b':{'B':1},
'c':{'A':1,'B':1,'C':1},
'd':{'B':1,'C':1}}
PersonalRank(G,0.85,'A',20)
'''
#items_pool = {'12':'PHP','32':'Nginx','121':'Apache','324':'Erlang','532':'Linux','123':'Ubuntu','53':'Java','1203':'Storm','429':'Kafka','2932':'Flume'}
items_pool = [12,32,121,324,532,123,53,1203,429,2932]
items = {'12':'PHP','1203':'Storm','123':'Ubuntu'}
user_items = {'1010':[12,1203,123,429]}
#print RandomSelectNagativeSample(items)
print InitModel(user_items,4)
'''
5.基于图的推荐算法
'''
基于图的推荐算法,二分图
'''
def PersonalRank(G,alpha,root,maxsetup):
rank = dict()
#rank = {x:0 for x in G.keys()}
rank = rank.fromkeys(G.keys(),0)
rank[root] = 1
for k in range(maxsetup):
tmp = dict()
#tmp = {x:0 for x in G.keys()}
tmp = tmp.fromkeys(G.keys(),0)
for i,ri in G.items():
for j,wij in ri.items():
if j not in tmp:
tmp[j] = 0
tmp[j] += alpha * rank[i]/(1.0*len(ri))
if j == root:
tmp[j] += 1 - alpha
rank = tmp
print 'iter:' + str(k) + "\t",
for key,value in rank.items():
print "%s:%.3f,\t" % (key,value),
print
return rank
if __name__ == '__main__':
G = {'A':{'a':1,'c':1},
'B':{'a':1,'b':1,'c':1,'d':1},
'C':{'c':1,'d':1},
'a':{'A':1,'B':1},
'b':{'B':1},
'c':{'A':1,'B':1,'C':1},
'd':{'B':1,'C':1}}
PersonalRank(G,0.85,'C',20)
更多推荐
已为社区贡献5条内容
所有评论(0)