From fb492d41f3b1f085c6a5997e9c1356c329d892d2 Mon Sep 17 00:00:00 2001 From: zhangpengxiang Date: Sun, 11 May 2025 10:55:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0Excel=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E5=8A=A0=E8=BD=BD=E5=8A=9F=E8=83=BD=EF=BC=8C=E5=A4=84=E7=90=86?= =?UTF-8?q?=E5=90=91=E9=87=8F=E5=BA=93=E7=AE=A1=E7=90=86createSchema?= =?UTF-8?q?=E6=96=B9=E6=B3=95WeaviateEmbeddingStore.builder().build();?= =?UTF-8?q?=E7=9B=B4=E6=8E=A5=E6=9E=84=E5=BB=BA=E6=8A=A5=E9=94=99=E9=97=AE?= =?UTF-8?q?=E9=A2=98=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ruoyi-modules-api/ruoyi-knowledge-api/pom.xml | 12 ++++++++++++ .../chain/loader/ResourceLoaderFactory.java | 9 +++++---- .../ruoyi/chain/split/ExcelTextSplitter.java | 17 +++++++++++++++++ .../main/java/org/ruoyi/constant/FileType.java | 11 +++++++++++ .../service/impl/VectorStoreServiceImpl.java | 9 +++++++-- 5 files changed, 52 insertions(+), 6 deletions(-) create mode 100644 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml index f6412e1b..83eac39a 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml +++ b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml @@ -103,6 +103,18 @@ 1.19.6 + + dev.langchain4j + langchain4j-document-parser-apache-tika + + + + + commons-io + commons-io + 2.17.0 + + diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java index aa72d761..ec33c668 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java @@ -1,10 +1,7 @@ package org.ruoyi.chain.loader; import lombok.AllArgsConstructor; -import org.ruoyi.chain.split.CharacterTextSplitter; -import org.ruoyi.chain.split.CodeTextSplitter; -import org.ruoyi.chain.split.MarkdownTextSplitter; -import org.ruoyi.chain.split.TokenTextSplitter; +import org.ruoyi.chain.split.*; import org.ruoyi.constant.FileType; import org.springframework.stereotype.Component; @@ -16,6 +13,8 @@ public class ResourceLoaderFactory { private final CodeTextSplitter codeTextSplitter; private final MarkdownTextSplitter markdownTextSplitter; private final TokenTextSplitter tokenTextSplitter; + private final ExcelTextSplitter excelTextSplitter; + public ResourceLoader getLoaderByFileType(String fileType){ if (FileType.isTextFile(fileType)){ return new TextFileLoader(characterTextSplitter); @@ -25,6 +24,8 @@ public class ResourceLoaderFactory { return new PdfFileLoader(characterTextSplitter); } else if (FileType.isMdFile(fileType)) { return new MarkDownFileLoader(markdownTextSplitter); + }else if (FileType.isExcel(fileType)) { + return new ExcelFileLoader(excelTextSplitter); }else if (FileType.isCodeFile(fileType)) { return new CodeFileLoader(codeTextSplitter); }else { diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java new file mode 100644 index 00000000..cc2b5f04 --- /dev/null +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java @@ -0,0 +1,17 @@ +package org.ruoyi.chain.split; + +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component +@AllArgsConstructor +@Slf4j +public class ExcelTextSplitter implements TextSplitter{ + @Override + public List split(String content, String kid) { + return null; + } +} diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java index aa141679..e939508b 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java @@ -7,6 +7,8 @@ public class FileType { public static final String DOC = "doc"; public static final String DOCX = "docx"; public static final String PDF = "pdf"; + public static final String XLS = "xls"; + public static final String XLSX = "xlsx"; public static final String LOG = "log"; public static final String XML = "xml"; @@ -88,4 +90,13 @@ public class FileType { } } + public static boolean isExcel(String type){ + if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){ + return true; + } + else { + return false; + } + } + } diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java index 294342a6..d74176a4 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java @@ -11,6 +11,7 @@ import dev.langchain4j.store.embedding.EmbeddingSearchRequest; import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.filter.Filter; import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo; +import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore; import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore; import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore; import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; @@ -39,11 +40,11 @@ public class VectorStoreServiceImpl implements VectorStoreService { private final ConfigService configService; - Map> storeMap; + Map> storeMap = new HashMap<>(); @Override public void createSchema(String kid,String modelName) { - EmbeddingStore embeddingStore = WeaviateEmbeddingStore.builder().build(); + EmbeddingStore embeddingStore; switch (modelName) { case "weaviate" -> { String protocol = configService.getConfigValue("weaviate", "protocol"); @@ -78,6 +79,10 @@ public class VectorStoreServiceImpl implements VectorStoreService { .collectionName(collectionName) .build(); } + default -> { + //使用内存 + embeddingStore = new InMemoryEmbeddingStore<>(); + } } storeMap.put(kid,embeddingStore); }