refactor(vector-store): 优化Milvus向量存储策略实现

重构Milvus向量存储策略,引入连接缓存机制减少重复创建连接的开销
将vectorModelName重命名为vectorStoreName以更准确表达用途
移除默认配置值,改为必须显式配置
优化代码结构,减少重复代码
This commit is contained in:
Yzm
2025-10-17 17:19:01 +08:00
parent 962c2b693c
commit 9d4a0e0b36
5 changed files with 54 additions and 107 deletions

View File

@@ -17,7 +17,7 @@ public class VectorStoreProperties {
/** /**
* 向量库类型 * 向量库类型
*/ */
private String type = "weaviate"; private String type;
/** /**
* Weaviate配置 * Weaviate配置
@@ -34,17 +34,17 @@ public class VectorStoreProperties {
/** /**
* 协议 * 协议
*/ */
private String protocol = "http"; private String protocol;
/** /**
* 主机地址 * 主机地址
*/ */
private String host = "localhost:8080"; private String host;
/** /**
* 类名 * 类名
*/ */
private String classname = "Document"; private String classname;
} }
@Data @Data
@@ -52,11 +52,11 @@ public class VectorStoreProperties {
/** /**
* 连接URL * 连接URL
*/ */
private String url = "http://localhost:19530"; private String url;
/** /**
* 集合名称 * 集合名称
*/ */
private String collectionname = "knowledge_base"; private String collectionname;
} }
} }

View File

@@ -32,9 +32,9 @@ public class StoreEmbeddingBo {
private List<String> fids; private List<String> fids;
/** /**
* 向量库模型名称 * 向量库名称
*/ */
private String vectorModelName; private String vectorStoreName;
/** /**
* 向量化模型id * 向量化模型id

View File

@@ -23,12 +23,10 @@ import org.springframework.stereotype.Component;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.stream.IntStream; import java.util.stream.IntStream;
// 新增导入
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* Milvus向量库策略实现
*
* @author Yzm
*/
@Slf4j @Slf4j
@Component @Component
public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy { public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
@@ -37,6 +35,27 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
super(vectorStoreProperties); super(vectorStoreProperties);
} }
// 缓存不同集合与 autoFlush 配置的 Milvus 连接
private final Map<String, EmbeddingStore<TextSegment>> storeCache = new ConcurrentHashMap<>();
private EmbeddingStore<TextSegment> getMilvusStore(String collectionName, boolean autoFlushOnInsert) {
String key = collectionName + "|" + autoFlushOnInsert;
return storeCache.computeIfAbsent(key, k ->
MilvusEmbeddingStore.builder()
.uri(vectorStoreProperties.getMilvus().getUrl())
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(autoFlushOnInsert)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build()
);
}
@Override @Override
public String getVectorStoreType() { public String getVectorStoreType() {
return "milvus"; return "milvus";
@@ -44,27 +63,14 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
@Override @Override
public void createSchema(String vectorModelName, String kid) { public void createSchema(String vectorModelName, String kid) {
String url = vectorStoreProperties.getMilvus().getUrl();
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
MilvusEmbeddingStore store = MilvusEmbeddingStore.builder() // 使用缓存获取连接以确保只初始化一次
.uri(url) EmbeddingStore<TextSegment> store = getMilvusStore(collectionName, true);
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(true)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build();
log.info("Milvus集合初始化完成: {}", collectionName); log.info("Milvus集合初始化完成: {}", collectionName);
} }
@Override @Override
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) { public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
createSchema(storeEmbeddingBo.getVectorModelName(), storeEmbeddingBo.getKid());
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl()); storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
@@ -77,57 +83,35 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
log.info("Milvus向量存储条数记录: {}", chunkList.size()); log.info("Milvus向量存储条数记录: {}", chunkList.size());
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder() // 复用连接,写入场景使用 autoFlush=false 以提升批量插入性能
.uri(vectorStoreProperties.getMilvus().getUrl()) EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, false);
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(false)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build();
IntStream.range(0, chunkList.size()).forEach(i -> { IntStream.range(0, chunkList.size()).forEach(i -> {
String text = chunkList.get(i); String text = chunkList.get(i);
String fid = fidList.get(i); String fid = fidList.get(i);
Embedding embedding = embeddingModel.embed(text).content(); Metadata metadata = new Metadata();
Metadata metadata = new Metadata() metadata.put("fid", fid);
.put("fid", fid) metadata.put("kid", kid);
.put("kid", kid) metadata.put("docId", docId);
.put("docId", docId);
TextSegment segment = TextSegment.from(text, metadata);
embeddingStore.add(embedding, segment);
});
TextSegment textSegment = TextSegment.from(text, metadata);
Embedding embedding = embeddingModel.embed(text).content();
embeddingStore.add(embedding, textSegment);
});
long endTime = System.currentTimeMillis(); long endTime = System.currentTimeMillis();
log.info("Milvus向量存储完成消耗时间{}秒", (endTime - startTime) / 1000); log.info("Milvus向量存储完成消耗时间{}秒", (endTime - startTime) / 1000);
} }
@Override @Override
public List<String> getQueryVector(QueryVectorBo queryVectorBo) { public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
createSchema(queryVectorBo.getVectorModelName(), queryVectorBo.getKid());
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(), EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),
queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl()); queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl());
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content(); Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + queryVectorBo.getKid(); String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + queryVectorBo.getKid();
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder() // 查询复用连接autoFlush 对查询无影响,此处保持 true
.uri(vectorStoreProperties.getMilvus().getUrl()) EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, true);
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(true)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build();
List<String> resultList = new ArrayList<>(); List<String> resultList = new ArrayList<>();
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder() EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
@@ -147,40 +131,15 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
@Override @Override
@SneakyThrows @SneakyThrows
public void removeById(String id, String modelName) { public void removeById(String id, String modelName) {
String url = vectorStoreProperties.getMilvus().getUrl(); // 注意:此处原逻辑使用 collectionname + id保持现状
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + id; EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(vectorStoreProperties.getMilvus().getCollectionname() + id, false);
MilvusEmbeddingStore store = MilvusEmbeddingStore.builder() embeddingStore.remove(id);
.uri(url)
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(true)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build();
// 修正MilvusEmbeddingStore 的 dropCollection 需要传入集合名
store.dropCollection(collectionName);
log.info("Milvus集合删除成功: {}", collectionName);
} }
@Override @Override
public void removeByDocId(String docId, String kid) { public void removeByDocId(String docId, String kid) {
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder() EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, false);
.uri(vectorStoreProperties.getMilvus().getUrl())
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(false)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build();
Filter filter = MetadataFilterBuilder.metadataKey("docId").isEqualTo(docId); Filter filter = MetadataFilterBuilder.metadataKey("docId").isEqualTo(docId);
embeddingStore.removeAll(filter); embeddingStore.removeAll(filter);
log.info("Milvus成功删除 docId={} 的所有向量数据", docId); log.info("Milvus成功删除 docId={} 的所有向量数据", docId);
@@ -189,18 +148,7 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
@Override @Override
public void removeByFid(String fid, String kid) { public void removeByFid(String fid, String kid) {
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid; String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder() EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, false);
.uri(vectorStoreProperties.getMilvus().getUrl())
.collectionName(collectionName)
.dimension(2048)
.indexType(IndexType.IVF_FLAT)
.metricType(MetricType.L2)
.autoFlushOnInsert(false)
.idFieldName("id")
.textFieldName("text")
.metadataFieldName("metadata")
.vectorFieldName("vector")
.build();
Filter filter = MetadataFilterBuilder.metadataKey("fid").isEqualTo(fid); Filter filter = MetadataFilterBuilder.metadataKey("fid").isEqualTo(fid);
embeddingStore.removeAll(filter); embeddingStore.removeAll(filter);
log.info("Milvus成功删除 fid={} 的所有向量数据", fid); log.info("Milvus成功删除 fid={} 的所有向量数据", fid);

View File

@@ -27,7 +27,7 @@ import java.util.*;
/** /**
* Weaviate向量库策略实现 * Weaviate向量库策略实现
* *
* @author ageer * @author Yzm
*/ */
@Slf4j @Slf4j
@Component @Component
@@ -84,7 +84,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
@Override @Override
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) { public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
createSchema(storeEmbeddingBo.getVectorModelName(), storeEmbeddingBo.getKid()); createSchema(storeEmbeddingBo.getVectorStoreName(), storeEmbeddingBo.getKid());
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl()); storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
List<String> chunkList = storeEmbeddingBo.getChunkList(); List<String> chunkList = storeEmbeddingBo.getChunkList();

View File

@@ -319,8 +319,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
storeEmbeddingBo.setDocId(docId); storeEmbeddingBo.setDocId(docId);
storeEmbeddingBo.setFids(fids); storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList); storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName()); storeEmbeddingBo.setVectorStoreName(knowledgeInfoVo.getVectorModelName());
storeEmbeddingBo.setEmbeddingModelId(knowledgeInfoVo.getEmbeddingModelId());
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName()); storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey()); storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost()); storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());