mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-03 23:16:12 +00:00
fix: 修复文件类型匹配和知识库切割配置问题
1. 修复 ResourceLoaderFactory 文件扩展名匹配问题 - 去除扩展名前导点,确保 .pdf 能正确匹配 PDF 解析器 - 修复 PDF/Word/Excel 等文件走错解析逻辑的问题 2. 优化文本切割器动态配置 - CharacterTextSplitter 和 ExcelTextSplitter 支持从知识库读取配置 - 根据 kid 查询 separator、textBlockSize、overlapChar - 查询失败时降级使用默认配置 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package org.ruoyi.factory;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.ruoyi.constant.FileTypeConstants;
|
||||
import org.ruoyi.service.knowledge.ResourceLoader;
|
||||
import org.ruoyi.service.knowledge.impl.loader.*;
|
||||
@@ -16,6 +17,7 @@ public class ResourceLoaderFactory {
|
||||
private final ExcelTextSplitter excelTextSplitter;
|
||||
|
||||
public ResourceLoader getLoaderByFileType(String fileType) {
|
||||
fileType = StringUtils.removeStart(fileType, ".");
|
||||
if (FileTypeConstants.isTextFile(fileType)) {
|
||||
return new TextFileLoader(characterTextSplitter);
|
||||
} else if (FileTypeConstants.isWord(fileType)) {
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
package org.ruoyi.service.knowledge.impl.split;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.domain.vo.knowledge.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.knowledge.IKnowledgeInfoService;
|
||||
import org.ruoyi.service.knowledge.TextSplitter;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.stereotype.Component;
|
||||
@@ -13,14 +16,37 @@ import java.util.List;
|
||||
@Component
|
||||
@Slf4j
|
||||
@Primary
|
||||
@AllArgsConstructor
|
||||
public class CharacterTextSplitter implements TextSplitter {
|
||||
|
||||
private final IKnowledgeInfoService knowledgeInfoService;
|
||||
|
||||
@Override
|
||||
public List<String> split(String content, String kid) {
|
||||
// 使用默认配置
|
||||
// 默认配置值
|
||||
String knowledgeSeparator = "#";
|
||||
int textBlockSize = 10000;
|
||||
int overlapChar = 500;
|
||||
int textBlockSize = 1000;
|
||||
int overlapChar = 50;
|
||||
|
||||
// 根据知识库ID查询配置,覆盖默认值
|
||||
if (StringUtils.isNotBlank(kid)) {
|
||||
try {
|
||||
KnowledgeInfoVo info = knowledgeInfoService.queryById(Long.parseLong(kid));
|
||||
if (info != null) {
|
||||
if (StringUtils.isNotBlank(info.getSeparator())) {
|
||||
knowledgeSeparator = info.getSeparator();
|
||||
}
|
||||
if (info.getTextBlockSize() != null && info.getTextBlockSize() > 0) {
|
||||
textBlockSize = info.getTextBlockSize().intValue();
|
||||
}
|
||||
if (info.getOverlapChar() != null && info.getOverlapChar() > 0) {
|
||||
overlapChar = info.getOverlapChar().intValue();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("查询知识库配置失败,使用默认配置, kid={}", kid, e);
|
||||
}
|
||||
}
|
||||
|
||||
List<String> chunkList = new ArrayList<>();
|
||||
if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) {
|
||||
|
||||
@@ -3,6 +3,8 @@ package org.ruoyi.service.knowledge.impl.split;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.domain.vo.knowledge.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.knowledge.IKnowledgeInfoService;
|
||||
import org.ruoyi.service.knowledge.TextSplitter;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@@ -15,13 +17,34 @@ import java.util.List;
|
||||
@Slf4j
|
||||
public class ExcelTextSplitter implements TextSplitter {
|
||||
|
||||
private final IKnowledgeInfoService knowledgeInfoService;
|
||||
|
||||
@Override
|
||||
public List<String> split(String content, String kid) {
|
||||
// 使用默认配置
|
||||
// 默认配置
|
||||
String knowledgeSeparator = "#";
|
||||
int textBlockSize = 10000;
|
||||
int overlapChar = 500;
|
||||
int textBlockSize = 1000;
|
||||
int overlapChar = 50;
|
||||
|
||||
// 根据知识库ID查询配置,覆盖默认值
|
||||
if (StringUtils.isNotBlank(kid)) {
|
||||
try {
|
||||
KnowledgeInfoVo info = knowledgeInfoService.queryById(Long.parseLong(kid));
|
||||
if (info != null) {
|
||||
if (StringUtils.isNotBlank(info.getSeparator())) {
|
||||
knowledgeSeparator = info.getSeparator();
|
||||
}
|
||||
if (info.getTextBlockSize() != null && info.getTextBlockSize() > 0) {
|
||||
textBlockSize = info.getTextBlockSize().intValue();
|
||||
}
|
||||
if (info.getOverlapChar() != null && info.getOverlapChar() > 0) {
|
||||
overlapChar = info.getOverlapChar().intValue();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.warn("查询知识库配置失败,使用默认配置, kid={}", kid, e);
|
||||
}
|
||||
}
|
||||
List<String> chunkList = new ArrayList<>();
|
||||
if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) {
|
||||
// 按自定义分隔符切分
|
||||
|
||||
Reference in New Issue
Block a user