import recsys.algorithm
recsys.algorithm.VERBOSE = True
from recsys.algorithm.factorize import SVD
from recsys.datamodel.data import Data
from recsys.evaluation.prediction import RMSE
import os,sys
tmpfile = "/tmp/movielens.zip"
moviefile = "./ml-1m/movies.dat"
class RecommendSystem(object):
def __init__(self, filename, sep, **format):
self.filename = filename
self.sep = sep
self.format = format
self.k = 100
self.min_values = 10
self.post_normalize = True
self.svd = SVD()
self.is_load = False
self.data = Data()
self.rmse = RMSE()
def get_data(self):
"""
获取数据
:return: None
"""
if not os.path.exists(tmpfile):
if not os.path.exists(self.filename):
sys.exit()
self.data.load(self.filename, sep=self.sep, format=self.format)
train, test = self.data.split_train_test(percent=80)
return train, test
else:
self.svd.load_model(tmpfile)
self.is_load = True
return None, None
def train(self, train):
"""
训练模型
:param train: 训练数据
:return: None
"""
if not self.is_load:
self.svd.set_data(train)
self.svd.compute(k=self.k, min_values=self.min_values, post_normalize=self.post_normalize, savefile=tmpfile[:-4])
return None
def rs_predict(self, itemid, userid):
"""
评分预测
:param itemid: 电影id
:param userid: 用户id
:return: None
"""
score = self.svd.predict(itemid, userid)
print "推荐的分数为:%f" % score
return score
def recommend_to_user(self, userid):
"""
推荐给用户
:param userid: 用户id
:return: None
"""
recommend_list = self.svd.recommend(userid, is_row=False)
movie_list = []
for line in open(moviefile, "r"):
movie_list.append(' '.join(line.split("::")[1:2]))
for itemid, rate in recommend_list:
print "给您推荐了%s,我们预测分数为%s" %(movie_list[itemid],rate)
return None
def evaluation(self, test):
"""
模型的评估
:param test: 测试集
:return: None
"""
if not self.is_load:
for value, itemid, userid in test.get():
try:
predict = self.rs_predict(itemid, userid)
self.rmse.add(value, predict)
except KeyError:
continue
error = self.rmse.compute()
print "模型误差为%s:" % error
return None
if __name__ == "__main__":
rs = RecommendSystem("./ml-1m/ratings.dat", "::", row=1, col=0, value=2, ids=int)
train, test = rs.get_data()
rs.train(train)
rs.evaluation(test)
rs.recommend_to_user(1)