恢复微信模块,优化知识库切片功能

This commit is contained in:
ageerle
2025-09-19 14:50:02 +08:00
parent afc1272ff5
commit 6462752fd6
6 changed files with 65 additions and 452 deletions

View File

@@ -1,17 +1,59 @@
package org.ruoyi.chain.split;
import jakarta.annotation.Resource;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.service.IKnowledgeInfoService;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class ExcelTextSplitter implements TextSplitter{
@Override
public List<String> split(String content, String kid) {
return null;
// 使用默认配置
String knowledgeSeparator = "#";
int textBlockSize = 10000;
int overlapChar = 500;
List<String> chunkList = new ArrayList<>();
if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) {
// 按自定义分隔符切分
String[] chunks = content.split(knowledgeSeparator);
chunkList.addAll(Arrays.asList(chunks));
} else {
int indexMin = 0;
int len = content.length();
int i = 0;
int right = 0;
while (true) {
if (len > right) {
int begin = i * textBlockSize - overlapChar;
if (begin < indexMin) {
begin = indexMin;
}
int end = textBlockSize * (i + 1) + overlapChar;
if (end > len) {
end = len;
}
String chunk = content.substring(begin, end);
chunkList.add(chunk);
i++;
right = right + textBlockSize;
} else {
break;
}
}
}
return chunkList;
}
}