补充Excel文档加载类

This commit is contained in:
zhangpengxiang
2025-05-11 14:50:11 +08:00
parent fb492d41f3
commit 7b3b727c0e

View File

@@ -0,0 +1,41 @@
package org.ruoyi.chain.loader;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.split.TextSplitter;
import org.ruoyi.common.core.exception.UtilException;
import org.springframework.stereotype.Component;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class ExcelFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
private static final int DEFAULT_BUFFER_SIZE = 8192;
@Override
public String getContent(InputStream inputStream) {
// 使用带缓冲的输入流包装(保持原流不自动关闭)
try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
Document document = apacheTikaDocumentParser.parse(bufferedStream);
return document.text();
} catch (IOException e) {
String errorMsg = "Excel文件流读取失败";
throw new UtilException(errorMsg, e);
} catch (RuntimeException e) {
String errorMsg = "Excel内容解析异常";
throw new UtilException(errorMsg, e);
}
}
@Override
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}