代码部分
package com.ruoyi.system.service.impl;
import com.ruoyi.system.domain.Book;
import com.ruoyi.system.domain.MyOrder;
import com.ruoyi.system.mapper.BookMapper;
import com.ruoyi.system.mapper.MyOrderMapper;
import com.ruoyi.system.service.IBookRecommendService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.redis.core.RedisTemplate;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import javax.annotation.PostConstruct;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
@Service
public class BookRecommendServiceImpl implements IBookRecommendService {
private static final Logger log = LoggerFactory.getLogger(BookRecommendServiceImpl.class);
@Autowired
private MyOrderMapper orderMapper;
@Autowired
private BookMapper bookMapper;
@Autowired
private RedisTemplate<String, Object> redisTemplate;
private static final String USER_SIMILARITY_KEY = "recommend:user:similarity";
private static final double SIMILARITY_THRESHOLD = 0.000001; // 相似度阈值
/**
* 应用启动时初始化推荐数据
*/
@PostConstruct
public void init() {
log.info("检查推荐数据初始化状态...");
try {
if(!hasRecommendationData()) {
log.info("未检测到推荐数据,开始初始化计算...");
preComputeUserSimilarities();
} else {
log.info("推荐数据已存在,跳过初始化计算");
}
} catch (Exception e) {
log.error("推荐数据初始化失败", e);
}
}
/**
* 检查是否存在推荐数据
*/
private boolean hasRecommendationData() {
Set<String> keys = redisTemplate.keys(USER_SIMILARITY_KEY + ":*");
return keys != null && !keys.isEmpty();
}
@Override
@Transactional(readOnly = true)
public List<Book> recommendBooksByUserCF(Long userId, int limit) {
if (userId == null || limit <= 0) {
return Collections.emptyList();
}
try {
// 1. 从Redis获取用户相似度数据
Map<Object, Object> similarityScoresObj = redisTemplate.opsForHash()
.entries(USER_SIMILARITY_KEY + ":" + userId);
if (similarityScoresObj == null || similarityScoresObj.isEmpty()) {
log.debug("用户 {} 无相似用户数据", userId);
return Collections.emptyList();
}
// 2. 转换数据类型
Map<Long, Double> similarityScores = convertSimilarityMap(similarityScoresObj);
// 3. 获取最相似的N个用户
List<Long> similarUserIds = getTopSimilarUsers(similarityScores, 10);
if (similarUserIds.isEmpty()) {
return Collections.emptyList();
}
// 4. 获取推荐图书
return generateRecommendations(userId, similarUserIds, limit);
} catch (Exception e) {
log.error("为用户 {} 生成推荐时发生错误", userId, e);
return Collections.emptyList();
}
}
/**
* 转换相似度Map数据类型
*/
private Map<Long, Double> convertSimilarityMap(Map<Object, Object> rawMap) {
return rawMap.entrySet().stream()
.collect(Collectors.toMap(
e -> Long.parseLong(e.getKey().toString()),
e -> Double.parseDouble(e.getValue().toString())
));
}
/**
* 获取最相似的用户ID列表
*/
private List<Long> getTopSimilarUsers(Map<Long, Double> similarityScores, int topN) {
return similarityScores.entrySet().stream()
.filter(e -> e.getValue() >= SIMILARITY_THRESHOLD)
.sorted(Map.Entry.<Long, Double>comparingByValue().reversed())
.limit(topN)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
}
/**
* 生成推荐图书列表
*/
private List<Book> generateRecommendations(Long targetUserId, List<Long> similarUserIds, int limit) {
// 1. 获取相似用户订单
List<MyOrder> similarUserOrders = orderMapper.selectCompletedOrdersByUserIds(similarUserIds);
// 2. 获取目标用户已购图书
Set<Long> purchasedBooks = getPurchasedBooks(targetUserId);
// 3. 计算图书推荐分数
Map<Long, Double> bookScores = calculateBookScores(similarUserOrders, purchasedBooks);
// 4. 获取推荐图书
return getTopRecommendedBooks(bookScores, limit);
}
/**
* 获取用户已购图书ID集合
*/
private Set<Long> getPurchasedBooks(Long userId) {
List<MyOrder> orders = orderMapper.selectCompletedOrdersByUserId(userId);
if (orders == null || orders.isEmpty()) {
return Collections.emptySet();
}
return orders.stream()
.map(order -> order.getBookId())
.collect(Collectors.toSet());
}
/**
* 计算图书推荐分数
*/
private Map<Long, Double> calculateBookScores(List<MyOrder> similarUserOrders, Set<Long> purchasedBooks) {
Map<Long, Double> bookScores = new HashMap<>();
for (MyOrder order : similarUserOrders) {
Long bookId = order.getBookId();
if (!purchasedBooks.contains(bookId)) {
bookScores.merge(bookId, (double) order.getQuantity(), Double::sum);
}
}
return bookScores;
}
/**
* 获取评分最高的推荐图书
*/
private List<Book> getTopRecommendedBooks(Map<Long, Double> bookScores, int limit) {
if (bookScores.isEmpty()) {
return Collections.emptyList();
}
List<Long> recommendedBookIds = bookScores.entrySet().stream()
.sorted(Map.Entry.<Long, Double>comparingByValue().reversed())
.limit(limit)
.map(Map.Entry::getKey)
.collect(Collectors.toList());
return bookMapper.selectBookByIds(recommendedBookIds);
}
@Override
@Transactional
public void preComputeUserSimilarities() {
log.info("开始计算用户相似度矩阵...");
long startTime = System.currentTimeMillis();
try {
// 1. 清空旧数据
clearExistingSimilarityData();
// 2. 获取所有用户ID(有完成订单的)
List<Long> userIds = orderMapper.selectAllUserIdsWithCompletedOrders();
log.info("找到{}个有订单的用户", userIds.size());
if (userIds.isEmpty()) {
log.warn("没有找到任何用户订单数据!");
return;
}
// 3. 构建用户-图书评分矩阵
Map<Long, Map<Long, Integer>> ratingMatrix = buildRatingMatrix(userIds);
// 4. 计算并存储相似度
computeAndStoreSimilarities(userIds, ratingMatrix);
long duration = (System.currentTimeMillis() - startTime) / 1000;
log.info("用户相似度矩阵计算完成,耗时{}秒", duration);
} catch (Exception e) {
log.error("计算用户相似度矩阵失败", e);
throw e;
}
}
/**
* 清空现有相似度数据
*/
private void clearExistingSimilarityData() {
Set<String> keys = redisTemplate.keys(USER_SIMILARITY_KEY + ":*");
if (keys != null && !keys.isEmpty()) {
redisTemplate.delete(keys);
log.info("已清除{}个旧的用户相似度记录", keys.size());
}
}
/**
* 构建用户-图书评分矩阵
*/
private Map<Long, Map<Long, Integer>> buildRatingMatrix(List<Long> userIds) {
Map<Long, Map<Long, Integer>> ratingMatrix = new HashMap<>();
for (Long userId : userIds) {
List<MyOrder> orders = orderMapper.selectCompletedOrdersByUserId(userId);
if (orders == null || orders.isEmpty()) {
continue;
}
Map<Long, Integer> userRatings = new HashMap<>();
for (MyOrder order : orders) {
if (order == null || order.getBookId() == null) {
continue;
}
Long bookId = order.getBookId();
Integer quantity = Math.toIntExact(order.getQuantity() != null ? order.getQuantity() : 0);
userRatings.merge(bookId, quantity, (oldVal, newVal) -> oldVal + newVal);
}
ratingMatrix.put(userId, userRatings);
}
return ratingMatrix;
}
/**
* 计算并存储用户相似度
*/
private void computeAndStoreSimilarities(List<Long> userIds, Map<Long, Map<Long, Integer>> ratingMatrix) {
int computedPairs = 0;
for (int i = 0; i < userIds.size(); i++) {
Long userId1 = userIds.get(i);
Map<Long, Integer> ratings1 = ratingMatrix.get(userId1);
Map<String, String> similarities = new HashMap<>();
// 只计算后续用户,避免重复计算
for (int j = i + 1; j < userIds.size(); j++) {
Long userId2 = userIds.get(j);
Map<Long, Integer> ratings2 = ratingMatrix.get(userId2);
double similarity = computeCosineSimilarity(ratings1, ratings2);
if (similarity >= SIMILARITY_THRESHOLD) {
similarities.put(userId2.toString(), String.valueOf(similarity));
computedPairs++;
}
}
if (!similarities.isEmpty()) {
String key = USER_SIMILARITY_KEY + ":" + userId1;
redisTemplate.opsForHash().putAll(key, similarities);
redisTemplate.expire(key, 7, TimeUnit.DAYS);
}
// 定期打印进度
if (i % 100 == 0 || i == userIds.size() - 1) {
log.info("已处理 {}/{} 用户", i + 1, userIds.size());
}
}
log.info("共计算{}对用户相似关系", computedPairs);
}
/**
* 计算余弦相似度
*/
private double computeCosineSimilarity(Map<Long, Integer> ratings1, Map<Long, Integer> ratings2) {
// 获取共同评价的图书
Set<Long> commonBooks = new HashSet<>(ratings1.keySet());
commonBooks.retainAll(ratings2.keySet());
if (commonBooks.isEmpty()) {
return 0.0;
}
// 计算点积和范数
double dotProduct = 0.0;
double norm1 = 0.0;
double norm2 = 0.0;
// 计算共同图书的部分
for (Long bookId : commonBooks) {
int r1 = ratings1.get(bookId);
int r2 = ratings2.get(bookId);
dotProduct += r1 * r2;
norm1 += r1 * r1;
norm2 += r2 * r2;
}
// 计算各自独有的部分
norm1 += ratings1.keySet().stream()
.filter(bookId -> !commonBooks.contains(bookId))
.mapToDouble(bookId -> Math.pow(ratings1.get(bookId), 2))
.sum();
norm2 += ratings2.keySet().stream()
.filter(bookId -> !commonBooks.contains(bookId))
.mapToDouble(bookId -> Math.pow(ratings2.get(bookId), 2))
.sum();
if (norm1 == 0 || norm2 == 0) {
return 0.0;}
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
/**
* 定时任务:每天凌晨3点更新相似度矩阵
*/
@Scheduled(cron = "0 0 3 * * ?")
public void scheduledSimilarityUpdate() {
log.info("定时任务:开始更新用户相似度矩阵...");
try {
preComputeUserSimilarities();
log.info("定时任务:用户相似度矩阵更新完成");
} catch (Exception e) {
log.error("定时任务:更新用户相似度矩阵失败", e);
}
}
}
协同过滤解析
协同过滤推荐算法原理与实现分析
这个代码实现了一个基于用户的协同过滤(User-based Collaborative Filtering)推荐系统,下面我将结合代码详细解析协同过滤算法的原理和实现方式。
1. 协同过滤基本概念
协同过滤(CF)是推荐系统中最经典的算法之一,核心思想是:
“相似用户喜欢的物品,你也可能喜欢”
1.1 两种主要类型
- 基于用户的协同过滤(User-CF):代码中实现的就是这种
- 基于物品的协同过滤(Item-CF)
2. 代码实现的User-CF流程
2.1 整体流程
离线计算阶段:
- 构建用户-物品评分矩阵
- 计算并存储用户相似度
在线推荐阶段:
- 查找相似用户
- 基于相似用户的偏好生成推荐
2.2 核心代码解析
(1) 离线计算阶段 (preComputeUserSimilarities
)
public void preComputeUserSimilarities() {
// 1. 清空旧数据
clearExistingSimilarityData();
// 2. 获取所有用户ID
List<Long> userIds = orderMapper.selectAllUserIdsWithCompletedOrders();
// 3. 构建评分矩阵
Map<Long, Map<Long, Integer>> ratingMatrix = buildRatingMatrix(userIds);
// 4. 计算并存储相似度
computeAndStoreSimilarities(userIds, ratingMatrix);
}
评分矩阵构建:
- 用户为行,图书为列
- 值为购买数量(作为评分)
相似度计算:
- 使用余弦相似度(Cosine Similarity)
- 只存储相似度高于阈值(SIMILARITY_THRESHOLD)的关系
(2) 相似度计算 (computeCosineSimilarity
)
private double computeCosineSimilarity(Map<Long, Integer> ratings1, Map<Long, Integer> ratings2) {
// 获取共同评价的图书
Set<Long> commonBooks = new HashSet<>(ratings1.keySet());
commonBooks.retainAll(ratings2.keySet());
// 计算点积和范数
double dotProduct = 0.0;
double norm1 = 0.0;
double norm2 = 0.0;
// 计算余弦相似度
return dotProduct / (Math.sqrt(norm1) * Math.sqrt(norm2));
}
余弦相似度公式:
similarity = (A·B) / (||A|| * ||B||)
(3) 在线推荐阶段 (recommendBooksByUserCF
)
public List<Book> recommendBooksByUserCF(Long userId, int limit) {
// 1. 获取用户相似度数据
Map<Long, Double> similarityScores = getSimilarityScores(userId);
// 2. 获取最相似的N个用户
List<Long> similarUserIds = getTopSimilarUsers(similarityScores, 10);
// 3. 生成推荐
return generateRecommendations(userId, similarUserIds, limit);
}
3. 关键技术点
3.1 数据存储优化
- Redis存储:用户相似度矩阵存储在Redis中,键格式为
recommend:user:similarity:{userId}
- 定期更新:通过
@Scheduled
每天凌晨3点更新
3.2 性能优化
- 只计算上三角矩阵:避免重复计算(A-B和B-A的相似度相同)
- 阈值过滤:只存储有意义的相似关系(SIMILARITY_THRESHOLD)
- 分批处理:每处理100个用户打印进度
3.3 推荐生成逻辑
- 排除用户已购买的图书
- 根据相似用户的购买记录加权计算推荐分数
- 按分数排序返回TopN推荐
4. 协同过滤的优缺点
优点:
- 不需要物品内容信息:仅依赖用户行为数据
- 能发现潜在兴趣:可能推荐出意想不到的物品
- 解释性强:可以告诉用户"因为和你相似的用户也喜欢"
缺点:
- 冷启动问题:新用户/新物品缺乏数据
- 稀疏性问题:用户-物品矩阵通常非常稀疏
- 计算复杂度高:用户增长时相似度计算成本呈平方级增长
5. 代码中的工程化实践
- 定时任务:
@Scheduled
定期更新相似度矩阵 - 启动初始化:
@PostConstruct
检查数据是否需要初始化 - 异常处理:完善的try-catch和日志记录
- 性能监控:记录关键步骤的执行时间
- 资源清理:计算前先清除旧数据
这个实现是一个典型的生产级协同过滤推荐系统,平衡了算法效果和工程实践,适合中等规模的电商平台使用。对于更大规模的系统,可能需要考虑更复杂的优化如分布式计算、近似算法等。