mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-14 12:23:39 +00:00
Merge remote-tracking branch 'origin/main'
This commit is contained in:
@@ -103,6 +103,18 @@
|
|||||||
<version>1.19.6</version>
|
<version>1.19.6</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dev.langchain4j</groupId>
|
||||||
|
<artifactId>langchain4j-document-parser-apache-tika</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<!-- ruoyi-knowledge-api/pom.xml -->
|
||||||
|
<dependency>
|
||||||
|
<groupId>commons-io</groupId>
|
||||||
|
<artifactId>commons-io</artifactId>
|
||||||
|
<version>2.17.0</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -0,0 +1,41 @@
|
|||||||
|
package org.ruoyi.chain.loader;
|
||||||
|
|
||||||
|
import dev.langchain4j.data.document.Document;
|
||||||
|
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.ruoyi.chain.split.TextSplitter;
|
||||||
|
import org.ruoyi.common.core.exception.UtilException;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.util.List;
|
||||||
|
@Component
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class ExcelFileLoader implements ResourceLoader {
|
||||||
|
private final TextSplitter textSplitter;
|
||||||
|
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||||
|
@Override
|
||||||
|
public String getContent(InputStream inputStream) {
|
||||||
|
// 使用带缓冲的输入流包装(保持原流不自动关闭)
|
||||||
|
try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
|
||||||
|
ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
|
||||||
|
Document document = apacheTikaDocumentParser.parse(bufferedStream);
|
||||||
|
return document.text();
|
||||||
|
} catch (IOException e) {
|
||||||
|
String errorMsg = "Excel文件流读取失败";
|
||||||
|
throw new UtilException(errorMsg, e);
|
||||||
|
} catch (RuntimeException e) {
|
||||||
|
String errorMsg = "Excel内容解析异常";
|
||||||
|
throw new UtilException(errorMsg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getChunkList(String content, String kid) {
|
||||||
|
return textSplitter.split(content, kid);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,7 @@
|
|||||||
package org.ruoyi.chain.loader;
|
package org.ruoyi.chain.loader;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import org.ruoyi.chain.split.CharacterTextSplitter;
|
import org.ruoyi.chain.split.*;
|
||||||
import org.ruoyi.chain.split.CodeTextSplitter;
|
|
||||||
import org.ruoyi.chain.split.MarkdownTextSplitter;
|
|
||||||
import org.ruoyi.chain.split.TokenTextSplitter;
|
|
||||||
|
|
||||||
import org.ruoyi.constant.FileType;
|
import org.ruoyi.constant.FileType;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
@@ -16,6 +13,8 @@ public class ResourceLoaderFactory {
|
|||||||
private final CodeTextSplitter codeTextSplitter;
|
private final CodeTextSplitter codeTextSplitter;
|
||||||
private final MarkdownTextSplitter markdownTextSplitter;
|
private final MarkdownTextSplitter markdownTextSplitter;
|
||||||
private final TokenTextSplitter tokenTextSplitter;
|
private final TokenTextSplitter tokenTextSplitter;
|
||||||
|
private final ExcelTextSplitter excelTextSplitter;
|
||||||
|
|
||||||
public ResourceLoader getLoaderByFileType(String fileType){
|
public ResourceLoader getLoaderByFileType(String fileType){
|
||||||
if (FileType.isTextFile(fileType)){
|
if (FileType.isTextFile(fileType)){
|
||||||
return new TextFileLoader(characterTextSplitter);
|
return new TextFileLoader(characterTextSplitter);
|
||||||
@@ -25,6 +24,8 @@ public class ResourceLoaderFactory {
|
|||||||
return new PdfFileLoader(characterTextSplitter);
|
return new PdfFileLoader(characterTextSplitter);
|
||||||
} else if (FileType.isMdFile(fileType)) {
|
} else if (FileType.isMdFile(fileType)) {
|
||||||
return new MarkDownFileLoader(markdownTextSplitter);
|
return new MarkDownFileLoader(markdownTextSplitter);
|
||||||
|
}else if (FileType.isExcel(fileType)) {
|
||||||
|
return new ExcelFileLoader(excelTextSplitter);
|
||||||
}else if (FileType.isCodeFile(fileType)) {
|
}else if (FileType.isCodeFile(fileType)) {
|
||||||
return new CodeFileLoader(codeTextSplitter);
|
return new CodeFileLoader(codeTextSplitter);
|
||||||
}else {
|
}else {
|
||||||
|
|||||||
@@ -0,0 +1,17 @@
|
|||||||
|
package org.ruoyi.chain.split;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class ExcelTextSplitter implements TextSplitter{
|
||||||
|
@Override
|
||||||
|
public List<String> split(String content, String kid) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -7,6 +7,8 @@ public class FileType {
|
|||||||
public static final String DOC = "doc";
|
public static final String DOC = "doc";
|
||||||
public static final String DOCX = "docx";
|
public static final String DOCX = "docx";
|
||||||
public static final String PDF = "pdf";
|
public static final String PDF = "pdf";
|
||||||
|
public static final String XLS = "xls";
|
||||||
|
public static final String XLSX = "xlsx";
|
||||||
|
|
||||||
public static final String LOG = "log";
|
public static final String LOG = "log";
|
||||||
public static final String XML = "xml";
|
public static final String XML = "xml";
|
||||||
@@ -88,4 +90,13 @@ public class FileType {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static boolean isExcel(String type){
|
||||||
|
if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
|||||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||||
import dev.langchain4j.store.embedding.filter.Filter;
|
import dev.langchain4j.store.embedding.filter.Filter;
|
||||||
import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
|
import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
|
||||||
|
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
||||||
import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
|
import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
|
||||||
import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
|
import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
|
||||||
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
|
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
|
||||||
@@ -39,11 +40,11 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
|||||||
|
|
||||||
private final ConfigService configService;
|
private final ConfigService configService;
|
||||||
|
|
||||||
Map<String,EmbeddingStore<TextSegment>> storeMap;
|
Map<String,EmbeddingStore<TextSegment>> storeMap = new HashMap<>();
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void createSchema(String kid,String modelName) {
|
public void createSchema(String kid,String modelName) {
|
||||||
EmbeddingStore<TextSegment> embeddingStore = WeaviateEmbeddingStore.builder().build();
|
EmbeddingStore<TextSegment> embeddingStore;
|
||||||
switch (modelName) {
|
switch (modelName) {
|
||||||
case "weaviate" -> {
|
case "weaviate" -> {
|
||||||
String protocol = configService.getConfigValue("weaviate", "protocol");
|
String protocol = configService.getConfigValue("weaviate", "protocol");
|
||||||
@@ -78,6 +79,10 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
|||||||
.collectionName(collectionName)
|
.collectionName(collectionName)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
default -> {
|
||||||
|
//使用内存
|
||||||
|
embeddingStore = new InMemoryEmbeddingStore<>();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
storeMap.put(kid,embeddingStore);
|
storeMap.put(kid,embeddingStore);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user