diff --git a/MILVUS_IMPLEMENTATION_GUIDE.md b/MILVUS_IMPLEMENTATION_GUIDE.md deleted file mode 100644 index 181a52ec..00000000 --- a/MILVUS_IMPLEMENTATION_GUIDE.md +++ /dev/null @@ -1,237 +0,0 @@ -# Milvus向量库实现指南 - -## 概述 - -本项目已完成Milvus向量库的集成,基于Milvus SDK 2.6.4版本实现。Milvus是一个开源的向量数据库,专为AI应用和相似性搜索而设计。 - -## 实现特性 - -### ✅ 已实现功能 - -1. **集合管理** - - 自动创建集合(Collection) - - 检查集合是否存在 - - 删除集合 - -2. **数据存储** - - 批量插入向量数据 - - 支持文本、fid、kid、docId等元数据 - - 自动生成向量嵌入 - -3. **向量搜索** - - 基于相似性的向量搜索 - - 支持TopK结果返回 - - 返回相关文本内容 - -4. **数据删除** - - 按文档ID删除 - - 按片段ID删除 - - 删除整个集合 - -## 架构设计 - -### 策略模式实现 - -``` -AbstractVectorStoreStrategy (抽象基类) - ↓ -MilvusVectorStoreStrategy (Milvus实现) -WeaviateVectorStoreStrategy (Weaviate实现) -``` - -### 核心类说明 - -- **MilvusVectorStoreStrategy**: Milvus向量库策略实现 -- **VectorStoreStrategyFactory**: 向量库策略工厂,支持动态切换 -- **VectorStoreService**: 向量库服务接口 - -## 配置说明 - -### 必需配置项 - -在系统配置中需要设置以下Milvus相关配置: - -```properties -# Milvus服务地址 -milvus.url=http://localhost:19530 - -# 集合名称前缀 -milvus.collectionname=LocalKnowledge - -# 向量库类型选择 -vector.store_type=milvus -``` - -### 集合Schema设计 - -每个集合包含以下字段: - -| 字段名 | 类型 | 说明 | -|--------|------|------| -| id | Int64 | 主键,自动生成 | -| text | VarChar(65535) | 文本内容 | -| fid | VarChar(255) | 片段ID | -| kid | VarChar(255) | 知识库ID | -| docId | VarChar(255) | 文档ID | -| vector | FloatVector(1024) | 向量数据 | - -### 索引配置 - -- **索引类型**: IVF_FLAT -- **距离度量**: L2 (欧几里得距离) -- **参数**: nlist=1024 - -## 使用示例 - -### 1. 创建集合 - -```java -MilvusVectorStoreStrategy strategy = new MilvusVectorStoreStrategy(configService); -strategy.createSchema("bge-large-zh-v1.5", "test001", "test-model"); -``` - -### 2. 存储向量数据 - -```java -StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo(); -storeEmbeddingBo.setVectorModelName("bge-large-zh-v1.5"); -storeEmbeddingBo.setKid("test001"); -storeEmbeddingBo.setDocId("doc001"); -storeEmbeddingBo.setChunkList(Arrays.asList("文本1", "文本2")); -storeEmbeddingBo.setFids(Arrays.asList("fid001", "fid002")); - -strategy.storeEmbeddings(storeEmbeddingBo); -``` - -### 3. 查询向量数据 - -```java -QueryVectorBo queryVectorBo = new QueryVectorBo(); -queryVectorBo.setQuery("查询文本"); -queryVectorBo.setKid("test001"); -queryVectorBo.setMaxResults(5); - -List results = strategy.getQueryVector(queryVectorBo); -``` - -### 4. 删除数据 - -```java -// 按文档ID删除 -strategy.removeByDocId("doc001", "test001"); - -// 按片段ID删除 -strategy.removeByFid("fid001", "test001"); - -// 删除整个集合 -strategy.removeById("test001", "model"); -``` - -## 部署要求 - -### Milvus服务部署 - -1. **Docker部署** (推荐) -```bash -# 下载docker-compose文件 -wget https://github.com/milvus-io/milvus/releases/download/v2.6.4/milvus-standalone-docker-compose.yml -O docker-compose.yml - -# 启动Milvus -docker-compose up -d -``` - -2. **验证部署** -```bash -# 检查服务状态 -docker-compose ps - -# 查看日志 -docker-compose logs milvus-standalone -``` - -### 系统要求 - -- **内存**: 最少8GB,推荐16GB+ -- **存储**: SSD推荐,至少50GB可用空间 -- **CPU**: 4核心以上 -- **网络**: 确保19530端口可访问 - -## 性能优化 - -### 1. 索引优化 - -根据数据量调整索引参数: -- 小数据集(<100万): nlist=1024 -- 中等数据集(100万-1000万): nlist=4096 -- 大数据集(>1000万): nlist=16384 - -### 2. 批量操作 - -- 批量插入:建议每批1000-10000条记录 -- 批量查询:避免频繁的单条查询 - -### 3. 内存管理 - -```yaml -# docker-compose.yml中的内存配置 -environment: - MILVUS_CONFIG_PATH: /milvus/configs/milvus.yaml -volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml -``` - -## 故障排除 - -### 常见问题 - -1. **连接失败** - - 检查Milvus服务是否启动 - - 验证网络连接和端口 - - 确认配置中的URL正确 - -2. **集合创建失败** - - 检查集合名称是否符合规范 - - 验证字段定义是否正确 - - 查看Milvus日志获取详细错误 - -3. **插入数据失败** - - 确认向量维度与schema一致 - - 检查数据格式是否正确 - - 验证集合是否已加载 - -4. **查询无结果** - - 确认集合中有数据 - - 检查查询参数设置 - - 验证向量化模型一致性 - -### 日志调试 - -启用详细日志: -```properties -logging.level.org.ruoyi.service.strategy.impl.MilvusVectorStoreStrategy=DEBUG -logging.level.io.milvus=DEBUG -``` - -## 与Weaviate对比 - -| 特性 | Milvus | Weaviate | -|------|--------|----------| -| 性能 | 高性能,专为大规模设计 | 中等性能 | -| 部署 | 需要独立部署 | 可独立部署或云服务 | -| 生态 | 专注向量搜索 | 集成更多AI功能 | -| 学习成本 | 中等 | 较低 | -| 扩展性 | 优秀 | 良好 | - -## 后续优化建议 - -1. **连接池管理**: 实现MilvusClient连接池 -2. **异步操作**: 支持异步插入和查询 -3. **分片策略**: 大数据集的分片管理 -4. **监控告警**: 集成性能监控 -5. **备份恢复**: 数据备份和恢复机制 - -## 参考资料 - -- [Milvus官方文档](https://milvus.io/docs) -- [Milvus Java SDK](https://github.com/milvus-io/milvus-sdk-java) -- [向量数据库最佳实践](https://milvus.io/docs/performance_faq.md) \ No newline at end of file diff --git a/ruoyi-admin/src/main/resources/application.yml b/ruoyi-admin/src/main/resources/application.yml index 6d5e6d2f..2bb9f120 100644 --- a/ruoyi-admin/src/main/resources/application.yml +++ b/ruoyi-admin/src/main/resources/application.yml @@ -328,3 +328,17 @@ spring: servers-configuration: classpath:mcp-server.json request-timeout: 300s +--- # 向量库配置 +vector-store: + # 向量存储类型 (weaviate/milvus) + type: weaviate + # Weaviate配置 + weaviate: + protocol: http + host: 127.0.0.1:6038 + classname: LocalKnowledge + # Milvus配置 + milvus: + url: http://localhost:19530 + collectionname: LocalKnowledge + diff --git a/ruoyi-common/ruoyi-common-core/src/main/java/org/ruoyi/common/core/config/VectorStoreProperties.java b/ruoyi-common/ruoyi-common-core/src/main/java/org/ruoyi/common/core/config/VectorStoreProperties.java new file mode 100644 index 00000000..98f3ddc4 --- /dev/null +++ b/ruoyi-common/ruoyi-common-core/src/main/java/org/ruoyi/common/core/config/VectorStoreProperties.java @@ -0,0 +1,62 @@ +package org.ruoyi.common.core.config; + +import lombok.Data; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.stereotype.Component; + +/** + * 向量库配置属性 + * + * @author ageer + */ +@Data +@Component +@ConfigurationProperties(prefix = "vector-store") +public class VectorStoreProperties { + + /** + * 向量库类型 + */ + private String type = "weaviate"; + + /** + * Weaviate配置 + */ + private Weaviate weaviate = new Weaviate(); + + /** + * Milvus配置 + */ + private Milvus milvus = new Milvus(); + + @Data + public static class Weaviate { + /** + * 协议 + */ + private String protocol = "http"; + + /** + * 主机地址 + */ + private String host = "localhost:8080"; + + /** + * 类名 + */ + private String classname = "Document"; + } + + @Data + public static class Milvus { + /** + * 连接URL + */ + private String url = "http://localhost:19530"; + + /** + * 集合名称 + */ + private String collectionname = "knowledge_base"; + } +} \ No newline at end of file diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java index ae0a227d..4e78f6f3 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java @@ -1,6 +1,6 @@ package org.ruoyi.service; -import com.google.protobuf.ServiceException; +import org.ruoyi.common.core.exception.ServiceException; import org.ruoyi.domain.bo.QueryVectorBo; import org.ruoyi.domain.bo.StoreEmbeddingBo; diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java index e7b023cc..677e4ce3 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java @@ -2,7 +2,6 @@ package org.ruoyi.service.impl; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.ruoyi.common.core.service.ConfigService; import org.ruoyi.domain.bo.QueryVectorBo; import org.ruoyi.domain.bo.StoreEmbeddingBo; import org.ruoyi.service.VectorStoreService; @@ -13,7 +12,7 @@ import org.springframework.stereotype.Service; import java.util.List; /** - * 向量库管理服务实现 - 使用策略模式 + * 向量库服务实现 * * @author ageer */ @@ -22,7 +21,6 @@ import java.util.List; @RequiredArgsConstructor public class VectorStoreServiceImpl implements VectorStoreService { - private final ConfigService configService; private final VectorStoreStrategyFactory strategyFactory; @@ -30,11 +28,7 @@ public class VectorStoreServiceImpl implements VectorStoreService { * 获取当前配置的向量库策略 */ private VectorStoreStrategy getCurrentStrategy() { - String vectorStoreType = configService.getConfigValue("vector", "type"); - if (vectorStoreType == null || vectorStoreType.trim().isEmpty()) { - vectorStoreType = "weaviate"; // 默认使用weaviate - } - return strategyFactory.getStrategy(vectorStoreType); + return strategyFactory.getStrategy(); } @Override diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/AbstractVectorStoreStrategy.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/AbstractVectorStoreStrategy.java index 104714cb..7fdeb195 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/AbstractVectorStoreStrategy.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/AbstractVectorStoreStrategy.java @@ -1,13 +1,13 @@ package org.ruoyi.service.strategy; -import com.google.protobuf.ServiceException; +import org.ruoyi.common.core.exception.ServiceException; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.ollama.OllamaEmbeddingModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import org.ruoyi.common.core.service.ConfigService; +import org.ruoyi.common.core.config.VectorStoreProperties; /** * 向量库策略抽象基类 @@ -19,7 +19,7 @@ import org.ruoyi.common.core.service.ConfigService; @RequiredArgsConstructor public abstract class AbstractVectorStoreStrategy implements VectorStoreStrategy { - protected final ConfigService configService; + protected final VectorStoreProperties vectorStoreProperties; /** * 获取向量模型 diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/VectorStoreStrategyFactory.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/VectorStoreStrategyFactory.java index 1e606b22..fbd5b27b 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/VectorStoreStrategyFactory.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/VectorStoreStrategyFactory.java @@ -1,15 +1,15 @@ package org.ruoyi.service.strategy; +import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; -import org.ruoyi.common.core.service.ConfigService; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.context.ApplicationContext; -import org.springframework.context.ApplicationContextAware; +import org.ruoyi.common.core.config.VectorStoreProperties; +import org.ruoyi.service.strategy.impl.MilvusVectorStoreStrategy; +import org.ruoyi.service.strategy.impl.WeaviateVectorStoreStrategy; import org.springframework.stereotype.Component; +import java.util.HashMap; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; /** * 向量库策略工厂 @@ -20,69 +20,55 @@ import java.util.concurrent.ConcurrentHashMap; @Slf4j @Component @RequiredArgsConstructor -public class VectorStoreStrategyFactory implements ApplicationContextAware { +public class VectorStoreStrategyFactory { - private final ConfigService configService; - private final Map strategyMap = new ConcurrentHashMap<>(); - private ApplicationContext applicationContext; + private final VectorStoreProperties vectorStoreProperties; + private final WeaviateVectorStoreStrategy weaviateStrategy; + private final MilvusVectorStoreStrategy milvusStrategy; - @Override - public void setApplicationContext(ApplicationContext applicationContext) { - this.applicationContext = applicationContext; - initStrategies(); - } + private Map strategies; - /** - * 初始化所有策略实现 - */ - private void initStrategies() { - Map strategies = applicationContext.getBeansOfType(VectorStoreStrategy.class); - for (VectorStoreStrategy strategy : strategies.values()) { - if (strategy instanceof AbstractVectorStoreStrategy) { - AbstractVectorStoreStrategy abstractStrategy = (AbstractVectorStoreStrategy) strategy; - strategyMap.put(abstractStrategy.getVectorStoreType(), strategy); - log.info("注册向量库策略: {}", abstractStrategy.getVectorStoreType()); - } - } + @PostConstruct + public void init() { + strategies = new HashMap<>(); + strategies.put("weaviate", weaviateStrategy); + strategies.put("milvus", milvusStrategy); + log.info("向量库策略工厂初始化完成,支持的策略: {}", strategies.keySet()); } /** * 获取当前配置的向量库策略 */ public VectorStoreStrategy getStrategy() { - String vectorStoreType = configService.getConfigValue("vector", "store_type"); - if (vectorStoreType == null || vectorStoreType.isEmpty()) { + String vectorStoreType = vectorStoreProperties.getType(); + if (vectorStoreType == null || vectorStoreType.trim().isEmpty()) { vectorStoreType = "weaviate"; // 默认使用weaviate } - VectorStoreStrategy strategy = strategyMap.get(vectorStoreType); + VectorStoreStrategy strategy = strategies.get(vectorStoreType.toLowerCase()); if (strategy == null) { log.warn("未找到向量库策略: {}, 使用默认策略: weaviate", vectorStoreType); - strategy = strategyMap.get("weaviate"); - } - - if (strategy == null) { - throw new RuntimeException("未找到可用的向量库策略实现"); + strategy = strategies.get("weaviate"); } + log.debug("使用向量库策略: {}", vectorStoreType); return strategy; } /** - * 根据类型获取特定的向量库策略 + * 根据类型获取向量库策略 */ - public VectorStoreStrategy getStrategy(String vectorStoreType) { - VectorStoreStrategy strategy = strategyMap.get(vectorStoreType); - if (strategy == null) { - throw new RuntimeException("未找到向量库策略: " + vectorStoreType); + public VectorStoreStrategy getStrategy(String type) { + if (type == null || type.trim().isEmpty()) { + return getStrategy(); } + + VectorStoreStrategy strategy = strategies.get(type.toLowerCase()); + if (strategy == null) { + log.warn("未找到向量库策略: {}, 使用默认策略", type); + return getStrategy(); + } + return strategy; } - - /** - * 获取所有可用的向量库类型 - */ - public String[] getAvailableTypes() { - return strategyMap.keySet().toArray(new String[0]); - } } \ No newline at end of file diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/MilvusVectorStoreStrategy.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/MilvusVectorStoreStrategy.java index 540eefc8..26a09f1f 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/MilvusVectorStoreStrategy.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/MilvusVectorStoreStrategy.java @@ -1,30 +1,23 @@ package org.ruoyi.service.strategy.impl; -import com.google.gson.Gson; -import com.google.gson.JsonObject; -import com.google.protobuf.ServiceException; +import org.ruoyi.common.core.exception.ServiceException; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.model.embedding.EmbeddingModel; -import io.milvus.v2.client.ConnectConfig; -import io.milvus.v2.client.MilvusClientV2; -import io.milvus.v2.common.DataType; -import io.milvus.v2.common.IndexParam; -import io.milvus.v2.service.collection.request.AddFieldReq; -import io.milvus.v2.service.collection.request.CreateCollectionReq; -import io.milvus.v2.service.collection.request.DescribeCollectionReq; -import io.milvus.v2.service.collection.request.DropCollectionReq; -import io.milvus.v2.service.collection.request.HasCollectionReq; -import io.milvus.v2.service.collection.response.DescribeCollectionResp; -import io.milvus.v2.service.vector.request.DeleteReq; -import io.milvus.v2.service.vector.request.InsertReq; -import io.milvus.v2.service.vector.request.SearchReq; -import io.milvus.v2.service.vector.request.data.BaseVector; -import io.milvus.v2.service.vector.request.data.FloatVec; -import io.milvus.v2.service.vector.response.DeleteResp; -import io.milvus.v2.service.vector.response.InsertResp; -import io.milvus.v2.service.vector.response.SearchResp; +import io.milvus.client.MilvusServiceClient; +import io.milvus.common.clientenum.ConsistencyLevelEnum; +import io.milvus.grpc.*; +import io.milvus.param.*; +import io.milvus.param.collection.*; +import io.milvus.param.dml.DeleteParam; +import io.milvus.param.dml.InsertParam; +import io.milvus.param.dml.SearchParam; +import io.milvus.param.index.CreateIndexParam; +import io.milvus.param.index.DescribeIndexParam; +import io.milvus.response.DescCollResponseWrapper; +import io.milvus.response.SearchResultsWrapper; +import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import org.ruoyi.common.core.service.ConfigService; +import org.ruoyi.common.core.config.VectorStoreProperties; import org.ruoyi.domain.bo.QueryVectorBo; import org.ruoyi.domain.bo.StoreEmbeddingBo; import org.ruoyi.service.strategy.AbstractVectorStoreStrategy; @@ -41,105 +34,122 @@ import java.util.*; @Component public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy { - private MilvusClientV2 client; - - public MilvusVectorStoreStrategy(ConfigService configService) { - super(configService); + private MilvusServiceClient milvusClient; + + public MilvusVectorStoreStrategy(VectorStoreProperties vectorStoreProperties) { + super(vectorStoreProperties); } @Override public String getVectorStoreType() { return "milvus"; } - + @Override public void createSchema(String vectorModelName, String kid, String modelName) { - log.info("Milvus创建schema: vectorModelName={}, kid={}, modelName={}", vectorModelName, kid, modelName); + String url = vectorStoreProperties.getMilvus().getUrl(); + String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; - // 1. 获取Milvus配置 - String host = configService.getConfigValue("milvus", "url"); - String collectionName = configService.getConfigValue("milvus", "collectionname") + kid; - - ConnectConfig config = ConnectConfig.builder() - .uri(host) + // 创建Milvus客户端连接 + ConnectParam connectParam = ConnectParam.newBuilder() + .withUri(url) .build(); - client = new MilvusClientV2(config); + milvusClient = new MilvusServiceClient(connectParam); - // 2. 检查集合是否存在 - HasCollectionReq hasCollectionReq = HasCollectionReq.builder() - .collectionName(collectionName) + // 检查集合是否存在 + HasCollectionParam hasCollectionParam = HasCollectionParam.newBuilder() + .withCollectionName(collectionName) .build(); - Boolean hasCollection = client.hasCollection(hasCollectionReq); + R hasCollectionResponse = milvusClient.hasCollection(hasCollectionParam); + if (hasCollectionResponse.getStatus() != R.Status.Success.getCode()) { + log.error("检查集合是否存在失败: {}", hasCollectionResponse.getMessage()); + return; + } - if (!hasCollection) { - // 3. 创建集合schema - CreateCollectionReq.CollectionSchema schema = CreateCollectionReq.CollectionSchema.builder() - .build(); + if (!hasCollectionResponse.getData()) { + // 创建字段 + List fields = new ArrayList<>(); - // 添加字段定义 - schema.addField(AddFieldReq.builder() - .fieldName("id") - .dataType(DataType.Int64) - .isPrimaryKey(true) - .autoID(true) + // ID字段 (主键) + fields.add(FieldType.newBuilder() + .withName("id") + .withDataType(DataType.Int64) + .withPrimaryKey(true) + .withAutoID(true) + .build()); + + // 文本字段 + fields.add(FieldType.newBuilder() + .withName("text") + .withDataType(DataType.VarChar) + .withMaxLength(65535) + .build()); + + // fid字段 + fields.add(FieldType.newBuilder() + .withName("fid") + .withDataType(DataType.VarChar) + .withMaxLength(255) + .build()); + + // kid字段 + fields.add(FieldType.newBuilder() + .withName("kid") + .withDataType(DataType.VarChar) + .withMaxLength(255) + .build()); + + // docId字段 + fields.add(FieldType.newBuilder() + .withName("docId") + .withDataType(DataType.VarChar) + .withMaxLength(255) + .build()); + + // 向量字段 + fields.add(FieldType.newBuilder() + .withName("vector") + .withDataType(DataType.FloatVector) + .withDimension(1024) // 根据实际embedding维度调整 .build()); - schema.addField(AddFieldReq.builder() - .fieldName("text") - .dataType(DataType.VarChar) - .maxLength(65535) - .build()); - - schema.addField(AddFieldReq.builder() - .fieldName("fid") - .dataType(DataType.VarChar) - .maxLength(255) - .build()); - - schema.addField(AddFieldReq.builder() - .fieldName("kid") - .dataType(DataType.VarChar) - .maxLength(255) - .build()); - - schema.addField(AddFieldReq.builder() - .fieldName("docId") - .dataType(DataType.VarChar) - .maxLength(255) - .build()); - - schema.addField(AddFieldReq.builder() - .fieldName("vector") - .dataType(DataType.FloatVector) - .dimension(1024) // 根据实际embedding维度调整 - .build()); - - // 4. 创建索引参数 - List indexParams = new ArrayList<>(); - indexParams.add(IndexParam.builder() - .fieldName("vector") - .indexType(IndexParam.IndexType.IVF_FLAT) - .metricType(IndexParam.MetricType.L2) - .extraParams(Map.of("nlist", 1024)) - .build()); - - // 5. 创建集合 - CreateCollectionReq createCollectionReq = CreateCollectionReq.builder() - .collectionName(collectionName) - .collectionSchema(schema) - .indexParams(indexParams) + // 创建集合 + CreateCollectionParam createCollectionParam = CreateCollectionParam.newBuilder() + .withCollectionName(collectionName) + .withDescription("Knowledge base collection for " + kid) + .withShardsNum(2) + .withFieldTypes(fields) .build(); - client.createCollection(createCollectionReq); - log.info("Milvus集合创建成功: {}", collectionName); + R createCollectionResponse = milvusClient.createCollection(createCollectionParam); + if (createCollectionResponse.getStatus() != R.Status.Success.getCode()) { + log.error("创建集合失败: {}", createCollectionResponse.getMessage()); + return; + } + + // 创建索引 + CreateIndexParam createIndexParam = CreateIndexParam.newBuilder() + .withCollectionName(collectionName) + .withFieldName("vector") + .withIndexType(IndexType.IVF_FLAT) + .withMetricType(MetricType.L2) + .withExtraParam("{\"nlist\":1024}") + .build(); + + R createIndexResponse = milvusClient.createIndex(createIndexParam); + if (createIndexResponse.getStatus() != R.Status.Success.getCode()) { + log.error("创建索引失败: {}", createIndexResponse.getMessage()); + } else { + log.info("Milvus集合和索引创建成功: {}", collectionName); + } } else { log.info("Milvus集合已存在: {}", collectionName); } } @Override - public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) throws ServiceException { + public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) { createSchema(storeEmbeddingBo.getVectorModelName(), storeEmbeddingBo.getKid(), storeEmbeddingBo.getVectorModelName()); EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), @@ -149,12 +159,13 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy { List fidList = storeEmbeddingBo.getFids(); String kid = storeEmbeddingBo.getKid(); String docId = storeEmbeddingBo.getDocId(); - String collectionName = configService.getConfigValue("milvus", "collectionname") + kid; + String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; log.info("Milvus向量存储条数记录: " + chunkList.size()); long startTime = System.currentTimeMillis(); // 准备批量插入数据 + List fields = new ArrayList<>(); List textList = new ArrayList<>(); List fidListData = new ArrayList<>(); List kidList = new ArrayList<>(); @@ -178,31 +189,25 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy { vectorList.add(vector); } - // 构建插入数据 - List data = new ArrayList<>(); - Gson gson = new Gson(); - for (int i = 0; i < textList.size(); i++) { - JsonObject row = new JsonObject(); - row.addProperty("text", textList.get(i)); - row.addProperty("fid", fidListData.get(i)); - row.addProperty("kid", kidList.get(i)); - row.addProperty("docId", docIdList.get(i)); - row.add("vector", gson.toJsonTree(vectorList.get(i))); - data.add(row); - } + // 构建字段数据 + fields.add(new InsertParam.Field("text", textList)); + fields.add(new InsertParam.Field("fid", fidListData)); + fields.add(new InsertParam.Field("kid", kidList)); + fields.add(new InsertParam.Field("docId", docIdList)); + fields.add(new InsertParam.Field("vector", vectorList)); // 执行插入 - InsertReq insertReq = InsertReq.builder() - .collectionName(collectionName) - .data(data) + InsertParam insertParam = InsertParam.newBuilder() + .withCollectionName(collectionName) + .withFields(fields) .build(); - InsertResp insertResp = client.insert(insertReq); - if (insertResp.getInsertCnt() > 0) { - log.info("Milvus向量存储成功,插入条数: {}", insertResp.getInsertCnt()); - } else { - log.error("Milvus向量存储失败"); + R insertResponse = milvusClient.insert(insertParam); + if (insertResponse.getStatus() != R.Status.Success.getCode()) { + log.error("Milvus向量存储失败: {}", insertResponse.getMessage()); throw new ServiceException("Milvus向量存储失败"); + } else { + log.info("Milvus向量存储成功,插入条数: {}", insertResponse.getData().getInsertCnt()); } long endTime = System.currentTimeMillis(); @@ -217,99 +222,116 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy { queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl()); Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content(); - String collectionName = configService.getConfigValue("milvus", "collectionname") + queryVectorBo.getKid(); + String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + queryVectorBo.getKid(); List resultList = new ArrayList<>(); - // 准备查询向量 - List searchVectors = new ArrayList<>(); - float[] queryVectorArray = new float[queryEmbedding.vector().length]; - for (int i = 0; i < queryEmbedding.vector().length; i++) { - queryVectorArray[i] = queryEmbedding.vector()[i]; - } - searchVectors.add(new FloatVec(queryVectorArray)); + // 加载集合到内存 + LoadCollectionParam loadCollectionParam = LoadCollectionParam.newBuilder() + .withCollectionName(collectionName) + .build(); + milvusClient.loadCollection(loadCollectionParam); - // 构建搜索请求 - SearchReq searchReq = SearchReq.builder() - .collectionName(collectionName) - .data(searchVectors) - .topK(queryVectorBo.getMaxResults()) - .outputFields(Arrays.asList("text", "fid", "kid", "docId")) + // 准备查询向量 + List> searchVectors = new ArrayList<>(); + List queryVector = new ArrayList<>(); + for (float f : queryEmbedding.vector()) { + queryVector.add(f); + } + searchVectors.add(queryVector); + + // 构建搜索参数 + SearchParam searchParam = SearchParam.newBuilder() + .withCollectionName(collectionName) + .withMetricType(MetricType.L2) + .withOutFields(Arrays.asList("text", "fid", "kid", "docId")) + .withTopK(queryVectorBo.getMaxResults()) + .withVectors(searchVectors) + .withVectorFieldName("vector") + .withParams("{\"nprobe\":10}") .build(); - SearchResp searchResp = client.search(searchReq); - if (searchResp != null && searchResp.getSearchResults() != null) { - List> searchResults = searchResp.getSearchResults(); + R searchResponse = milvusClient.search(searchParam); + if (searchResponse.getStatus() != R.Status.Success.getCode()) { + log.error("Milvus查询失败: {}", searchResponse.getMessage()); + return resultList; + } + + SearchResultsWrapper wrapper = new SearchResultsWrapper(searchResponse.getData().getResults()); + + // 遍历搜索结果 + for (int i = 0; i < wrapper.getIDScore(0).size(); i++) { + SearchResultsWrapper.IDScore idScore = wrapper.getIDScore(0).get(i); - for (List results : searchResults) { - for (SearchResp.SearchResult result : results) { - Map entity = result.getEntity(); - String text = (String) entity.get("text"); - if (text != null) { - resultList.add(text); - } + // 获取text字段数据 + List textFieldData = wrapper.getFieldData("text", 0); + if (textFieldData != null && i < textFieldData.size()) { + Object textObj = textFieldData.get(i); + if (textObj != null) { + resultList.add(textObj.toString()); + log.debug("找到相似文本,ID: {}, 距离: {}, 内容: {}", + idScore.getLongID(), idScore.getScore(), textObj.toString()); } } - } else { - log.error("Milvus查询失败或无结果"); } return resultList; } @Override - public void removeById(String id, String modelName) throws ServiceException { - String collectionName = configService.getConfigValue("milvus", "collectionname") + id; + @SneakyThrows + public void removeById(String id, String modelName) { + String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + id; // 删除整个集合 - DropCollectionReq dropCollectionReq = DropCollectionReq.builder() - .collectionName(collectionName) + DropCollectionParam dropCollectionParam = DropCollectionParam.newBuilder() + .withCollectionName(collectionName) .build(); - try { - client.dropCollection(dropCollectionReq); + R dropResponse = milvusClient.dropCollection(dropCollectionParam); + if (dropResponse.getStatus() != R.Status.Success.getCode()) { + log.error("Milvus集合删除失败: {}", dropResponse.getMessage()); + throw new ServiceException("Milvus集合删除失败"); + } else { log.info("Milvus集合删除成功: {}", collectionName); - } catch (Exception e) { - log.error("Milvus集合删除失败: {}", e.getMessage()); - throw new ServiceException("Milvus集合删除失败: " + e.getMessage()); } } @Override - public void removeByDocId(String docId, String kid) throws ServiceException { - String collectionName = configService.getConfigValue("milvus", "collectionname") + kid; + public void removeByDocId(String docId, String kid) { + String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; String expr = "docId == \"" + docId + "\""; - DeleteReq deleteReq = DeleteReq.builder() - .collectionName(collectionName) - .filter(expr) + DeleteParam deleteParam = DeleteParam.newBuilder() + .withCollectionName(collectionName) + .withExpr(expr) .build(); - try { - DeleteResp deleteResp = client.delete(deleteReq); - log.info("Milvus成功删除 docId={} 的所有向量数据,删除条数: {}", docId, deleteResp.getDeleteCnt()); - } catch (Exception e) { - log.error("Milvus删除失败: {}", e.getMessage()); - throw new ServiceException(e.getMessage()); + R deleteResponse = milvusClient.delete(deleteParam); + if (deleteResponse.getStatus() != R.Status.Success.getCode()) { + log.error("Milvus删除失败: {}", deleteResponse.getMessage()); + throw new ServiceException("Milvus删除失败"); + } else { + log.info("Milvus成功删除 docId={} 的所有向量数据,删除条数: {}", docId, deleteResponse.getData().getDeleteCnt()); } } @Override - public void removeByFid(String fid, String kid) throws ServiceException { - String collectionName = configService.getConfigValue("milvus", "collectionname") + kid; + public void removeByFid(String fid, String kid) { + String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; String expr = "fid == \"" + fid + "\""; - DeleteReq deleteReq = DeleteReq.builder() - .collectionName(collectionName) - .filter(expr) + DeleteParam deleteParam = DeleteParam.newBuilder() + .withCollectionName(collectionName) + .withExpr(expr) .build(); - try { - DeleteResp deleteResp = client.delete(deleteReq); - log.info("Milvus成功删除 fid={} 的所有向量数据,删除条数: {}", fid, deleteResp.getDeleteCnt()); - } catch (Exception e) { - log.error("Milvus删除失败: {}", e.getMessage()); - throw new ServiceException(e.getMessage()); + R deleteResponse = milvusClient.delete(deleteParam); + if (deleteResponse.getStatus() != R.Status.Success.getCode()) { + log.error("Milvus删除失败: {}", deleteResponse.getMessage()); + throw new ServiceException("Milvus删除失败"); + } else { + log.info("Milvus成功删除 fid={} 的所有向量数据,删除条数: {}", fid, deleteResponse.getData().getDeleteCnt()); } } } \ No newline at end of file diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/WeaviateVectorStoreStrategy.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/WeaviateVectorStoreStrategy.java index 9e3d3aec..6275d939 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/WeaviateVectorStoreStrategy.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/strategy/impl/WeaviateVectorStoreStrategy.java @@ -1,7 +1,7 @@ package org.ruoyi.service.strategy.impl; import cn.hutool.json.JSONObject; -import com.google.protobuf.ServiceException; +import org.ruoyi.common.core.exception.ServiceException; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.model.embedding.EmbeddingModel; import io.weaviate.client.Config; @@ -17,7 +17,7 @@ import io.weaviate.client.v1.schema.model.Schema; import io.weaviate.client.v1.schema.model.WeaviateClass; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -import org.ruoyi.common.core.service.ConfigService; +import org.ruoyi.common.core.config.VectorStoreProperties; import org.ruoyi.domain.bo.QueryVectorBo; import org.ruoyi.domain.bo.StoreEmbeddingBo; import org.ruoyi.service.strategy.AbstractVectorStoreStrategy; @@ -35,8 +35,8 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy { private WeaviateClient client; - public WeaviateVectorStoreStrategy(ConfigService configService) { - super(configService); + public WeaviateVectorStoreStrategy(VectorStoreProperties vectorStoreProperties) { + super(vectorStoreProperties); } @Override @@ -46,9 +46,9 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy { @Override public void createSchema(String vectorModelName, String kid, String modelName) { - String protocol = configService.getConfigValue("weaviate", "protocol"); - String host = configService.getConfigValue("weaviate", "host"); - String className = configService.getConfigValue("weaviate", "classname") + kid; + String protocol = vectorStoreProperties.getWeaviate().getProtocol(); + String host = vectorStoreProperties.getWeaviate().getHost(); + String className = vectorStoreProperties.getWeaviate().getClassname() + kid; // 创建 Weaviate 客户端 client = new WeaviateClient(new Config(protocol, host)); // 检查类是否存在,如果不存在就创建 schema @@ -128,7 +128,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy { vectorStrings.add(String.valueOf(v)); } String vectorStr = String.join(",", vectorStrings); - String className = configService.getConfigValue("weaviate", "classname"); + String className = vectorStoreProperties.getWeaviate().getClassname(); // 构建 GraphQL 查询 String graphQLQuery = String.format( @@ -176,9 +176,9 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy { @Override @SneakyThrows public void removeById(String id, String modelName) { - String protocol = configService.getConfigValue("weaviate", "protocol"); - String host = configService.getConfigValue("weaviate", "host"); - String className = configService.getConfigValue("weaviate", "classname"); + String protocol = vectorStoreProperties.getWeaviate().getProtocol(); + String host = vectorStoreProperties.getWeaviate().getHost(); + String className = vectorStoreProperties.getWeaviate().getClassname(); String finalClassName = className + id; WeaviateClient client = new WeaviateClient(new Config(protocol, host)); Result result = client.schema().classDeleter().withClassName(finalClassName).run(); @@ -192,7 +192,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy { @Override public void removeByDocId(String docId, String kid) { - String className = configService.getConfigValue("weaviate", "classname") + kid; + String className = vectorStoreProperties.getWeaviate().getClassname() + kid; // 构建 Where 条件 WhereFilter whereFilter = WhereFilter.builder() .path("docId") @@ -212,7 +212,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy { @Override public void removeByFid(String fid, String kid) { - String className = configService.getConfigValue("weaviate", "classname") + kid; + String className = vectorStoreProperties.getWeaviate().getClassname() + kid; // 构建 Where 条件 WhereFilter whereFilter = WhereFilter.builder() .path("fid")