From ccbf5c9520b77da26bd821e02059ab9f4a89ad9b Mon Sep 17 00:00:00 2001 From: RobustH <1511209518@qq.com> Date: Tue, 14 Apr 2026 23:18:29 +0800 Subject: [PATCH] =?UTF-8?q?feat(rag):=20=E7=9F=A5=E8=AF=86=E5=BA=93?= =?UTF-8?q?=E6=A3=80=E7=B4=A2=E6=B5=8B=E8=AF=95=E6=96=B0=E5=A2=9E=E6=B7=B7?= =?UTF-8?q?=E5=90=88=E6=A3=80=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../bo/knowledge/KnowledgeFragmentBo.java | 10 ++ .../domain/bo/knowledge/KnowledgeInfoBo.java | 10 ++ .../entity/knowledge/KnowledgeFragment.java | 5 + .../entity/knowledge/KnowledgeInfo.java | 10 ++ .../vo/knowledge/KnowledgeFragmentVo.java | 7 +- .../domain/vo/knowledge/KnowledgeInfoVo.java | 13 +++ .../vo/knowledge/KnowledgeRetrievalVo.java | 24 +++++ .../knowledge/KnowledgeFragmentMapper.java | 9 ++ .../impl/KnowledgeAttachServiceImpl.java | 1 + .../impl/KnowledgeFragmentServiceImpl.java | 92 ++++++++++++++++++- 10 files changed, 178 insertions(+), 3 deletions(-) diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeFragmentBo.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeFragmentBo.java index 1508462f..895ab69f 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeFragmentBo.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeFragmentBo.java @@ -79,4 +79,14 @@ public class KnowledgeFragmentBo extends BaseEntity { */ private String rerankModel; + /** + * 是否启用混合检索 + */ + private Boolean enableHybrid; + + /** + * 混合检索权重 (0.0-1.0) + */ + private Double hybridAlpha; + } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeInfoBo.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeInfoBo.java index 8629018a..5f7a143e 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeInfoBo.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/bo/knowledge/KnowledgeInfoBo.java @@ -92,5 +92,15 @@ public class KnowledgeInfoBo extends BaseEntity { */ private String remark; + /** + * 是否启用混合检索(0 否 1 是) + */ + private Integer enableHybrid; + + /** + * 混合检索权重比例 (0.0-1.0) + */ + private Double hybridAlpha; + } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeFragment.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeFragment.java index 04716e15..d184a10a 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeFragment.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeFragment.java @@ -47,5 +47,10 @@ public class KnowledgeFragment extends BaseEntity { */ private String remark; + /** + * 知识库ID + */ + private Long knowledgeId; + } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeInfo.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeInfo.java index a5211e69..e2e6da4b 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeInfo.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/entity/knowledge/KnowledgeInfo.java @@ -93,5 +93,15 @@ public class KnowledgeInfo extends BaseEntity { */ private String remark; + /** + * 是否启用混合检索(0 否 1 是) + */ + private Integer enableHybrid; + + /** + * 混合检索权重比例 (0.0-1.0) + */ + private Double hybridAlpha; + } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeFragmentVo.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeFragmentVo.java index b8be695e..45b4a9ab 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeFragmentVo.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeFragmentVo.java @@ -39,7 +39,7 @@ public class KnowledgeFragmentVo implements Serializable { * 片段索引下标 */ @ExcelProperty(value = "片段索引下标") - private Long idx; + private Integer idx; /** * 文档内容 @@ -53,5 +53,10 @@ public class KnowledgeFragmentVo implements Serializable { @ExcelProperty(value = "备注") private String remark; + /** + * 知识库ID + */ + private Long knowledgeId; + } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeInfoVo.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeInfoVo.java index e65444e7..6f9a148d 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeInfoVo.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeInfoVo.java @@ -113,6 +113,19 @@ public class KnowledgeInfoVo implements Serializable { @ExcelProperty(value = "备注") private String remark; + /** + * 是否启用混合检索(0 否 1 是) + */ + @ExcelProperty(value = "是否启用混合检索", converter = ExcelDictConvert.class) + @ExcelDictFormat(readConverterExp = "0=否,1=是") + private Integer enableHybrid; + + /** + * 混合检索权重比例 (0.0-1.0) + */ + @ExcelProperty(value = "混合检索权重比例") + private Double hybridAlpha; + /** * 文档数(统计字段,非数据库列) */ diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeRetrievalVo.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeRetrievalVo.java index 95c8e4cf..420015d8 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeRetrievalVo.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/domain/vo/knowledge/KnowledgeRetrievalVo.java @@ -1,7 +1,9 @@ package org.ruoyi.domain.vo.knowledge; +import lombok.AllArgsConstructor; import lombok.Builder; import lombok.Data; +import lombok.NoArgsConstructor; import java.io.Serial; import java.io.Serializable; @@ -13,11 +15,33 @@ import java.io.Serializable; */ @Data @Builder +@NoArgsConstructor +@AllArgsConstructor public class KnowledgeRetrievalVo implements Serializable { @Serial private static final long serialVersionUID = 1L; + /** + * 片段ID + */ + private String id; + + /** + * 文档ID + */ + private String docId; + + /** + * 知识库ID + */ + private Long knowledgeId; + + /** + * 分片索引 + */ + private Integer idx; + /** * 片段内容 */ diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/mapper/knowledge/KnowledgeFragmentMapper.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/mapper/knowledge/KnowledgeFragmentMapper.java index b99ad6af..304bb7d5 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/mapper/knowledge/KnowledgeFragmentMapper.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/mapper/knowledge/KnowledgeFragmentMapper.java @@ -33,4 +33,13 @@ public interface KnowledgeFragmentMapper extends BaseMapperPlus") List selectFragmentCountByDocIds(@Param("docIds") List docIds); + @Select("") + List searchByKeyword(@Param("knowledgeId") Long knowledgeId, @Param("query") String query, @Param("limit") Integer limit); } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeAttachServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeAttachServiceImpl.java index e8260f25..28b3d90f 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeAttachServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeAttachServiceImpl.java @@ -187,6 +187,7 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService { String fid = RandomUtil.randomString(10); fids.add(fid); KnowledgeFragment knowledgeFragment = new KnowledgeFragment(); + knowledgeFragment.setKnowledgeId(knowledgeId); knowledgeFragment.setDocId(docId); knowledgeFragment.setIdx(i); knowledgeFragment.setContent(chunkList.get(i)); diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeFragmentServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeFragmentServiceImpl.java index 68ccf0bf..bbe56794 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeFragmentServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/KnowledgeFragmentServiceImpl.java @@ -28,6 +28,8 @@ import org.ruoyi.service.vector.VectorStoreService; import org.springframework.stereotype.Service; import java.util.ArrayList; import java.util.stream.Collectors; +import java.util.*; +import java.util.concurrent.CompletableFuture; import java.util.List; import java.util.Map; @@ -180,8 +182,47 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService { queryVectorBo.setApiKey(chatModel.getApiKey()); queryVectorBo.setBaseUrl(chatModel.getApiHost()); - // 3. 执行物理检索 - List allResults = vectorStoreService.search(queryVectorBo); + // 3. 执行搜索 (向量搜索 + 关键词搜索) + List allResults; + + boolean hybridEnabled = Boolean.TRUE.equals(bo.getEnableHybrid()) || + Integer.valueOf(1).equals(knowledgeInfoVo.getEnableHybrid()); + + if (hybridEnabled) { + log.info("执行混合检索: kid={}, query={}", bo.getKnowledgeId(), bo.getQuery()); + try { + // 并行执行向量搜索 + CompletableFuture> vectorFuture = CompletableFuture.supplyAsync(() -> + vectorStoreService.search(queryVectorBo)); + + // 执行关键词搜索 (MySQL) + int limit = bo.getTopK() != null ? bo.getTopK() : 50; + List keywordFragments = baseMapper.searchByKeyword(bo.getKnowledgeId(), bo.getQuery(), limit); + List keywordResults = keywordFragments.stream().map(f -> { + KnowledgeRetrievalVo vo = new KnowledgeRetrievalVo(); + vo.setId(f.getId().toString()); + vo.setContent(f.getContent()); + vo.setDocId(f.getDocId()); + vo.setIdx(f.getIdx()); + vo.setKnowledgeId(f.getKnowledgeId()); + vo.setScore(10.0); // 初始分,后续由 RRF 重新打分 + return vo; + }).collect(Collectors.toList()); + + List vectorResults = vectorFuture.get(); + log.info("抽取混合结果成功: Vector命中={}条, Keyword命中={}条", vectorResults.size(), keywordResults.size()); + + double alpha = bo.getHybridAlpha() != null ? bo.getHybridAlpha() : + (knowledgeInfoVo.getHybridAlpha() != null ? knowledgeInfoVo.getHybridAlpha() : 0.5); + + allResults = calculateRRF(vectorResults, keywordResults, alpha); + } catch (Exception e) { + log.error("混合检索执行或合并失败,已自动降级回退到纯向量检索", e); + allResults = vectorStoreService.search(queryVectorBo); + } + } else { + allResults = vectorStoreService.search(queryVectorBo); + } // 初始化原始排名 for (int i = 0; i < allResults.size(); i++) { @@ -230,4 +271,51 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService { .filter(res -> res.getScore() >= threshold) .collect(Collectors.toList()); } + + /** + * RRF (Reciprocal Rank Fusion) 融合算法 + * 公式: Score = (1-alpha) * (1 / (k + rank_vector)) + alpha * (1 / (k + rank_keyword)) + */ + private List calculateRRF(List vectorList, List keywordList, double alpha) { + Map allMap = new HashMap<>(); + Map vectorScores = new HashMap<>(); + Map keywordScores = new HashMap<>(); + + int k = 60; // 常用 RRF 常数 + + for (int i = 0; i < vectorList.size(); i++) { + KnowledgeRetrievalVo vo = vectorList.get(i); + allMap.put(vo.getId(), vo); + vectorScores.put(vo.getId(), 1.0 / (k + i + 1)); + } + + for (int i = 0; i < keywordList.size(); i++) { + KnowledgeRetrievalVo vo = keywordList.get(i); + if (!allMap.containsKey(vo.getId())) { + allMap.put(vo.getId(), vo); + } + keywordScores.put(vo.getId(), 1.0 / (k + i + 1)); + } + + // 重新计算得分 + List fusedResults = new ArrayList<>(); + for (Map.Entry entry : allMap.entrySet()) { + String id = entry.getKey(); + double vScore = vectorScores.getOrDefault(id, 0.0); + double kScore = keywordScores.getOrDefault(id, 0.0); + + // 混合分值 + double finalScore = (1 - alpha) * vScore + alpha * kScore; + + // 分值归一化/缩放:将 RRF 分值放大到 0-1 范围 + // 理论单路最大得分为 1/61 ≈ 0.016,乘以 60 使其处于相似度常用区间 + KnowledgeRetrievalVo vo = entry.getValue(); + vo.setScore(finalScore * 60.0); + fusedResults.add(vo); + } + + // 按融合分数从高到低排序 + fusedResults.sort((a, b) -> b.getScore().compareTo(a.getScore())); + return fusedResults; + } }