diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java new file mode 100644 index 00000000..b47ce11f --- /dev/null +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java @@ -0,0 +1,41 @@ +package org.ruoyi.chain.loader; + +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.ruoyi.chain.split.TextSplitter; +import org.ruoyi.common.core.exception.UtilException; +import org.springframework.stereotype.Component; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +@Component +@AllArgsConstructor +@Slf4j +public class ExcelFileLoader implements ResourceLoader { + private final TextSplitter textSplitter; + private static final int DEFAULT_BUFFER_SIZE = 8192; + @Override + public String getContent(InputStream inputStream) { + // 使用带缓冲的输入流包装(保持原流不自动关闭) + try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) { + ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser(); + Document document = apacheTikaDocumentParser.parse(bufferedStream); + return document.text(); + } catch (IOException e) { + String errorMsg = "Excel文件流读取失败"; + throw new UtilException(errorMsg, e); + } catch (RuntimeException e) { + String errorMsg = "Excel内容解析异常"; + throw new UtilException(errorMsg, e); + } + } + + @Override + public List getChunkList(String content, String kid) { + return textSplitter.split(content, kid); + } +}