mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-03-18 23:23:43 +08:00
补充Excel文档加载类
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import dev.langchain4j.data.document.Document;
|
||||
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.ruoyi.common.core.exception.UtilException;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
// 使用带缓冲的输入流包装(保持原流不自动关闭)
|
||||
try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
|
||||
ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
|
||||
Document document = apacheTikaDocumentParser.parse(bufferedStream);
|
||||
return document.text();
|
||||
} catch (IOException e) {
|
||||
String errorMsg = "Excel文件流读取失败";
|
||||
throw new UtilException(errorMsg, e);
|
||||
} catch (RuntimeException e) {
|
||||
String errorMsg = "Excel内容解析异常";
|
||||
throw new UtilException(errorMsg, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChunkList(String content, String kid) {
|
||||
return textSplitter.split(content, kid);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user