diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/factory/ResourceLoaderFactory.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/factory/ResourceLoaderFactory.java index a03b4fd1..de293426 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/factory/ResourceLoaderFactory.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/factory/ResourceLoaderFactory.java @@ -1,6 +1,7 @@ package org.ruoyi.factory; import lombok.AllArgsConstructor; +import org.apache.commons.lang3.StringUtils; import org.ruoyi.constant.FileTypeConstants; import org.ruoyi.service.knowledge.ResourceLoader; import org.ruoyi.service.knowledge.impl.loader.*; @@ -16,6 +17,7 @@ public class ResourceLoaderFactory { private final ExcelTextSplitter excelTextSplitter; public ResourceLoader getLoaderByFileType(String fileType) { + fileType = StringUtils.removeStart(fileType, "."); if (FileTypeConstants.isTextFile(fileType)) { return new TextFileLoader(characterTextSplitter); } else if (FileTypeConstants.isWord(fileType)) { diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/CharacterTextSplitter.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/CharacterTextSplitter.java index a345b865..107f0dc1 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/CharacterTextSplitter.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/CharacterTextSplitter.java @@ -1,7 +1,10 @@ package org.ruoyi.service.knowledge.impl.split; +import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.ruoyi.common.core.utils.StringUtils; +import org.ruoyi.domain.vo.knowledge.KnowledgeInfoVo; +import org.ruoyi.service.knowledge.IKnowledgeInfoService; import org.ruoyi.service.knowledge.TextSplitter; import org.springframework.context.annotation.Primary; import org.springframework.stereotype.Component; @@ -13,14 +16,37 @@ import java.util.List; @Component @Slf4j @Primary +@AllArgsConstructor public class CharacterTextSplitter implements TextSplitter { + private final IKnowledgeInfoService knowledgeInfoService; + @Override public List split(String content, String kid) { - // 使用默认配置 + // 默认配置值 String knowledgeSeparator = "#"; - int textBlockSize = 10000; - int overlapChar = 500; + int textBlockSize = 1000; + int overlapChar = 50; + + // 根据知识库ID查询配置,覆盖默认值 + if (StringUtils.isNotBlank(kid)) { + try { + KnowledgeInfoVo info = knowledgeInfoService.queryById(Long.parseLong(kid)); + if (info != null) { + if (StringUtils.isNotBlank(info.getSeparator())) { + knowledgeSeparator = info.getSeparator(); + } + if (info.getTextBlockSize() != null && info.getTextBlockSize() > 0) { + textBlockSize = info.getTextBlockSize().intValue(); + } + if (info.getOverlapChar() != null && info.getOverlapChar() > 0) { + overlapChar = info.getOverlapChar().intValue(); + } + } + } catch (Exception e) { + log.warn("查询知识库配置失败,使用默认配置, kid={}", kid, e); + } + } List chunkList = new ArrayList<>(); if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) { diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/ExcelTextSplitter.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/ExcelTextSplitter.java index 0c242b20..1b610107 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/ExcelTextSplitter.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/service/knowledge/impl/split/ExcelTextSplitter.java @@ -3,6 +3,8 @@ package org.ruoyi.service.knowledge.impl.split; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.ruoyi.common.core.utils.StringUtils; +import org.ruoyi.domain.vo.knowledge.KnowledgeInfoVo; +import org.ruoyi.service.knowledge.IKnowledgeInfoService; import org.ruoyi.service.knowledge.TextSplitter; import org.springframework.stereotype.Component; @@ -15,13 +17,34 @@ import java.util.List; @Slf4j public class ExcelTextSplitter implements TextSplitter { + private final IKnowledgeInfoService knowledgeInfoService; @Override public List split(String content, String kid) { - // 使用默认配置 + // 默认配置 String knowledgeSeparator = "#"; - int textBlockSize = 10000; - int overlapChar = 500; + int textBlockSize = 1000; + int overlapChar = 50; + + // 根据知识库ID查询配置,覆盖默认值 + if (StringUtils.isNotBlank(kid)) { + try { + KnowledgeInfoVo info = knowledgeInfoService.queryById(Long.parseLong(kid)); + if (info != null) { + if (StringUtils.isNotBlank(info.getSeparator())) { + knowledgeSeparator = info.getSeparator(); + } + if (info.getTextBlockSize() != null && info.getTextBlockSize() > 0) { + textBlockSize = info.getTextBlockSize().intValue(); + } + if (info.getOverlapChar() != null && info.getOverlapChar() > 0) { + overlapChar = info.getOverlapChar().intValue(); + } + } + } catch (Exception e) { + log.warn("查询知识库配置失败,使用默认配置, kid={}", kid, e); + } + } List chunkList = new ArrayList<>(); if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) { // 按自定义分隔符切分