转自:https://blog.csdn.net/u012995888/article/details/79077681

原文用java实现,想看详细介绍的请跳原文

链接:https://blog.csdn.net/u012995888/article/details/79077681

数据格式可以自行修改,用其它方式存储更方便

直接上代码(基于用户的协同过滤)

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
'''
@Time    : 2018/9/15 16:08
@Author  : Negen
@Site    : 
@File    : recommend2.py
@Software: PyCharm
'''
import numpy as np
from colorama import Fore

users = ["小明", "小花", "小美", "小张", "小李"]
movies = ["电影1", "电影2", "电影3", "电影4", "电影5", "电影6", "电影7"]
allUserMovieStarList=[
            [3,1,4,4,1,0,0],
            [0,5,1,0,0,4,0],
            [1,0,5,4,3,5,2],
            [3,1,4,3,5,0,0],
            [5,2,0,1,0,5,5]]


def sim_distancec(person1: str,person2: str) -> str:
    """
    欧氏距离计算两个用户之间的相似度
    :param person1:
    :param person2:
    :return:
    """
    index_user1 = users.index(person1)
    index_user2 = users.index(person2)
    score_user1 = allUserMovieStarList[index_user1]
    score_user2 = allUserMovieStarList[index_user2]
    distance = np.sqrt(((np.array(score_user1) - np.array(score_user2)) ** 2).sum())
    return distance


def cal_all_user_distance(person: str, order: int = 1) -> list:
    """
    计算person和其他用户的相似度 ,并返回有序列表
    :param person:
    :param order: 1为升序,0为降序  默认为1
    :return:list
    """
    all_user_sim = [(sim_distancec(u,person),users.index(u))for u in users if u!=person]
    all_user_sim.sort()
    if order == 1: return all_user_sim
    all_user_sim.reverse()
    return all_user_sim


def cal_movie_recommend(person: str):
    """
    取与person相似的前两个用户
    相似度*

    :param person:
    :return:
    """
    users_sim = cal_all_user_distance(person)[0:2]
    # print(len(users_sim))
    sumRate = 0
    tempRate = 0
    recommendMovies = []
    for i in range(0, len(movies)):
        tempRate = allUserMovieStarList[users_sim[0][1]][i]*users_sim[0][0] + allUserMovieStarList[users_sim[1][1]][i]*users_sim[1][0]
        sumRate += tempRate
        recommendMovies.append(tempRate)
    # print(recommendMovies)
    temp = enumerate(recommendMovies)
    t = sorted(temp, key=lambda index: index[1])
    avg = sumRate/len(movies)
    targetMoviesIndex = [i for i,v in t if v > avg]
    # print(targetMoviesIndex)
    targetMovies = [movies[t] for t in targetMoviesIndex if allUserMovieStarList[users.index(person)][t] == 0]
    targetMovies.sort()
    return targetMovies

# print(cal_all_user_distance('小张'))

def main(username: str):
    print(Fore.GREEN, username , Fore.RESET)
    print('推荐电影列表:', cal_movie_recommend(username))


if __name__ == '__main__':
    main("小明")
    main("小张")
    main("小花")
    main("小李")
    main("小美")

运行结果:

Logo

CSDN联合极客时间,共同打造面向开发者的精品内容学习社区,助力成长!

更多推荐