mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-16 05:13:41 +00:00
refactor(vector-store): 优化Milvus向量存储策略实现
重构Milvus向量存储策略,引入连接缓存机制减少重复创建连接的开销 将vectorModelName重命名为vectorStoreName以更准确表达用途 移除默认配置值,改为必须显式配置 优化代码结构,减少重复代码
This commit is contained in:
@@ -17,7 +17,7 @@ public class VectorStoreProperties {
|
|||||||
/**
|
/**
|
||||||
* 向量库类型
|
* 向量库类型
|
||||||
*/
|
*/
|
||||||
private String type = "weaviate";
|
private String type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Weaviate配置
|
* Weaviate配置
|
||||||
@@ -34,17 +34,17 @@ public class VectorStoreProperties {
|
|||||||
/**
|
/**
|
||||||
* 协议
|
* 协议
|
||||||
*/
|
*/
|
||||||
private String protocol = "http";
|
private String protocol;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 主机地址
|
* 主机地址
|
||||||
*/
|
*/
|
||||||
private String host = "localhost:8080";
|
private String host;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 类名
|
* 类名
|
||||||
*/
|
*/
|
||||||
private String classname = "Document";
|
private String classname;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Data
|
@Data
|
||||||
@@ -52,11 +52,11 @@ public class VectorStoreProperties {
|
|||||||
/**
|
/**
|
||||||
* 连接URL
|
* 连接URL
|
||||||
*/
|
*/
|
||||||
private String url = "http://localhost:19530";
|
private String url;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 集合名称
|
* 集合名称
|
||||||
*/
|
*/
|
||||||
private String collectionname = "knowledge_base";
|
private String collectionname;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -32,9 +32,9 @@ public class StoreEmbeddingBo {
|
|||||||
private List<String> fids;
|
private List<String> fids;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 向量库模型名称
|
* 向量库名称
|
||||||
*/
|
*/
|
||||||
private String vectorModelName;
|
private String vectorStoreName;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 向量化模型id
|
* 向量化模型id
|
||||||
|
|||||||
@@ -23,12 +23,10 @@ import org.springframework.stereotype.Component;
|
|||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.stream.IntStream;
|
import java.util.stream.IntStream;
|
||||||
|
// 新增导入
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
/**
|
|
||||||
* Milvus向量库策略实现
|
|
||||||
*
|
|
||||||
* @author Yzm
|
|
||||||
*/
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
||||||
@@ -37,6 +35,27 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
|||||||
super(vectorStoreProperties);
|
super(vectorStoreProperties);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 缓存不同集合与 autoFlush 配置的 Milvus 连接
|
||||||
|
private final Map<String, EmbeddingStore<TextSegment>> storeCache = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
private EmbeddingStore<TextSegment> getMilvusStore(String collectionName, boolean autoFlushOnInsert) {
|
||||||
|
String key = collectionName + "|" + autoFlushOnInsert;
|
||||||
|
return storeCache.computeIfAbsent(key, k ->
|
||||||
|
MilvusEmbeddingStore.builder()
|
||||||
|
.uri(vectorStoreProperties.getMilvus().getUrl())
|
||||||
|
.collectionName(collectionName)
|
||||||
|
.dimension(2048)
|
||||||
|
.indexType(IndexType.IVF_FLAT)
|
||||||
|
.metricType(MetricType.L2)
|
||||||
|
.autoFlushOnInsert(autoFlushOnInsert)
|
||||||
|
.idFieldName("id")
|
||||||
|
.textFieldName("text")
|
||||||
|
.metadataFieldName("metadata")
|
||||||
|
.vectorFieldName("vector")
|
||||||
|
.build()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getVectorStoreType() {
|
public String getVectorStoreType() {
|
||||||
return "milvus";
|
return "milvus";
|
||||||
@@ -44,27 +63,14 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void createSchema(String vectorModelName, String kid) {
|
public void createSchema(String vectorModelName, String kid) {
|
||||||
String url = vectorStoreProperties.getMilvus().getUrl();
|
|
||||||
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
|
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
|
||||||
MilvusEmbeddingStore store = MilvusEmbeddingStore.builder()
|
// 使用缓存获取连接以确保只初始化一次
|
||||||
.uri(url)
|
EmbeddingStore<TextSegment> store = getMilvusStore(collectionName, true);
|
||||||
.collectionName(collectionName)
|
|
||||||
.dimension(2048)
|
|
||||||
.indexType(IndexType.IVF_FLAT)
|
|
||||||
.metricType(MetricType.L2)
|
|
||||||
.autoFlushOnInsert(true)
|
|
||||||
.idFieldName("id")
|
|
||||||
.textFieldName("text")
|
|
||||||
.metadataFieldName("metadata")
|
|
||||||
.vectorFieldName("vector")
|
|
||||||
.build();
|
|
||||||
log.info("Milvus集合初始化完成: {}", collectionName);
|
log.info("Milvus集合初始化完成: {}", collectionName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
||||||
createSchema(storeEmbeddingBo.getVectorModelName(), storeEmbeddingBo.getKid());
|
|
||||||
|
|
||||||
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
|
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
|
||||||
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
|
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
|
||||||
|
|
||||||
@@ -77,57 +83,35 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
|||||||
log.info("Milvus向量存储条数记录: {}", chunkList.size());
|
log.info("Milvus向量存储条数记录: {}", chunkList.size());
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder()
|
// 复用连接,写入场景使用 autoFlush=false 以提升批量插入性能
|
||||||
.uri(vectorStoreProperties.getMilvus().getUrl())
|
EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, false);
|
||||||
.collectionName(collectionName)
|
|
||||||
.dimension(2048)
|
|
||||||
.indexType(IndexType.IVF_FLAT)
|
|
||||||
.metricType(MetricType.L2)
|
|
||||||
.autoFlushOnInsert(false)
|
|
||||||
.idFieldName("id")
|
|
||||||
.textFieldName("text")
|
|
||||||
.metadataFieldName("metadata")
|
|
||||||
.vectorFieldName("vector")
|
|
||||||
.build();
|
|
||||||
|
|
||||||
IntStream.range(0, chunkList.size()).forEach(i -> {
|
IntStream.range(0, chunkList.size()).forEach(i -> {
|
||||||
String text = chunkList.get(i);
|
String text = chunkList.get(i);
|
||||||
String fid = fidList.get(i);
|
String fid = fidList.get(i);
|
||||||
Embedding embedding = embeddingModel.embed(text).content();
|
Metadata metadata = new Metadata();
|
||||||
Metadata metadata = new Metadata()
|
metadata.put("fid", fid);
|
||||||
.put("fid", fid)
|
metadata.put("kid", kid);
|
||||||
.put("kid", kid)
|
metadata.put("docId", docId);
|
||||||
.put("docId", docId);
|
|
||||||
TextSegment segment = TextSegment.from(text, metadata);
|
|
||||||
embeddingStore.add(embedding, segment);
|
|
||||||
});
|
|
||||||
|
|
||||||
|
TextSegment textSegment = TextSegment.from(text, metadata);
|
||||||
|
Embedding embedding = embeddingModel.embed(text).content();
|
||||||
|
embeddingStore.add(embedding, textSegment);
|
||||||
|
});
|
||||||
long endTime = System.currentTimeMillis();
|
long endTime = System.currentTimeMillis();
|
||||||
log.info("Milvus向量存储完成消耗时间:{}秒", (endTime - startTime) / 1000);
|
log.info("Milvus向量存储完成消耗时间:{}秒", (endTime - startTime) / 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
|
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
|
||||||
createSchema(queryVectorBo.getVectorModelName(), queryVectorBo.getKid());
|
|
||||||
|
|
||||||
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),
|
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),
|
||||||
queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl());
|
queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl());
|
||||||
|
|
||||||
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
|
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
|
||||||
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + queryVectorBo.getKid();
|
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + queryVectorBo.getKid();
|
||||||
|
|
||||||
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder()
|
// 查询复用连接,autoFlush 对查询无影响,此处保持 true
|
||||||
.uri(vectorStoreProperties.getMilvus().getUrl())
|
EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, true);
|
||||||
.collectionName(collectionName)
|
|
||||||
.dimension(2048)
|
|
||||||
.indexType(IndexType.IVF_FLAT)
|
|
||||||
.metricType(MetricType.L2)
|
|
||||||
.autoFlushOnInsert(true)
|
|
||||||
.idFieldName("id")
|
|
||||||
.textFieldName("text")
|
|
||||||
.metadataFieldName("metadata")
|
|
||||||
.vectorFieldName("vector")
|
|
||||||
.build();
|
|
||||||
|
|
||||||
List<String> resultList = new ArrayList<>();
|
List<String> resultList = new ArrayList<>();
|
||||||
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
|
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
|
||||||
@@ -147,40 +131,15 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
|||||||
@Override
|
@Override
|
||||||
@SneakyThrows
|
@SneakyThrows
|
||||||
public void removeById(String id, String modelName) {
|
public void removeById(String id, String modelName) {
|
||||||
String url = vectorStoreProperties.getMilvus().getUrl();
|
// 注意:此处原逻辑使用 collectionname + id,保持现状
|
||||||
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + id;
|
EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(vectorStoreProperties.getMilvus().getCollectionname() + id, false);
|
||||||
MilvusEmbeddingStore store = MilvusEmbeddingStore.builder()
|
embeddingStore.remove(id);
|
||||||
.uri(url)
|
|
||||||
.collectionName(collectionName)
|
|
||||||
.dimension(2048)
|
|
||||||
.indexType(IndexType.IVF_FLAT)
|
|
||||||
.metricType(MetricType.L2)
|
|
||||||
.autoFlushOnInsert(true)
|
|
||||||
.idFieldName("id")
|
|
||||||
.textFieldName("text")
|
|
||||||
.metadataFieldName("metadata")
|
|
||||||
.vectorFieldName("vector")
|
|
||||||
.build();
|
|
||||||
// 修正:MilvusEmbeddingStore 的 dropCollection 需要传入集合名
|
|
||||||
store.dropCollection(collectionName);
|
|
||||||
log.info("Milvus集合删除成功: {}", collectionName);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void removeByDocId(String docId, String kid) {
|
public void removeByDocId(String docId, String kid) {
|
||||||
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
|
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
|
||||||
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder()
|
EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, false);
|
||||||
.uri(vectorStoreProperties.getMilvus().getUrl())
|
|
||||||
.collectionName(collectionName)
|
|
||||||
.dimension(2048)
|
|
||||||
.indexType(IndexType.IVF_FLAT)
|
|
||||||
.metricType(MetricType.L2)
|
|
||||||
.autoFlushOnInsert(false)
|
|
||||||
.idFieldName("id")
|
|
||||||
.textFieldName("text")
|
|
||||||
.metadataFieldName("metadata")
|
|
||||||
.vectorFieldName("vector")
|
|
||||||
.build();
|
|
||||||
Filter filter = MetadataFilterBuilder.metadataKey("docId").isEqualTo(docId);
|
Filter filter = MetadataFilterBuilder.metadataKey("docId").isEqualTo(docId);
|
||||||
embeddingStore.removeAll(filter);
|
embeddingStore.removeAll(filter);
|
||||||
log.info("Milvus成功删除 docId={} 的所有向量数据", docId);
|
log.info("Milvus成功删除 docId={} 的所有向量数据", docId);
|
||||||
@@ -189,18 +148,7 @@ public class MilvusVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
|||||||
@Override
|
@Override
|
||||||
public void removeByFid(String fid, String kid) {
|
public void removeByFid(String fid, String kid) {
|
||||||
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
|
String collectionName = vectorStoreProperties.getMilvus().getCollectionname() + kid;
|
||||||
EmbeddingStore<TextSegment> embeddingStore = MilvusEmbeddingStore.builder()
|
EmbeddingStore<TextSegment> embeddingStore = getMilvusStore(collectionName, false);
|
||||||
.uri(vectorStoreProperties.getMilvus().getUrl())
|
|
||||||
.collectionName(collectionName)
|
|
||||||
.dimension(2048)
|
|
||||||
.indexType(IndexType.IVF_FLAT)
|
|
||||||
.metricType(MetricType.L2)
|
|
||||||
.autoFlushOnInsert(false)
|
|
||||||
.idFieldName("id")
|
|
||||||
.textFieldName("text")
|
|
||||||
.metadataFieldName("metadata")
|
|
||||||
.vectorFieldName("vector")
|
|
||||||
.build();
|
|
||||||
Filter filter = MetadataFilterBuilder.metadataKey("fid").isEqualTo(fid);
|
Filter filter = MetadataFilterBuilder.metadataKey("fid").isEqualTo(fid);
|
||||||
embeddingStore.removeAll(filter);
|
embeddingStore.removeAll(filter);
|
||||||
log.info("Milvus成功删除 fid={} 的所有向量数据", fid);
|
log.info("Milvus成功删除 fid={} 的所有向量数据", fid);
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ import java.util.*;
|
|||||||
/**
|
/**
|
||||||
* Weaviate向量库策略实现
|
* Weaviate向量库策略实现
|
||||||
*
|
*
|
||||||
* @author ageer
|
* @author Yzm
|
||||||
*/
|
*/
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
@@ -84,7 +84,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
|||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
||||||
createSchema(storeEmbeddingBo.getVectorModelName(), storeEmbeddingBo.getKid());
|
createSchema(storeEmbeddingBo.getVectorStoreName(), storeEmbeddingBo.getKid());
|
||||||
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
|
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
|
||||||
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
|
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
|
||||||
List<String> chunkList = storeEmbeddingBo.getChunkList();
|
List<String> chunkList = storeEmbeddingBo.getChunkList();
|
||||||
|
|||||||
@@ -319,8 +319,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
|||||||
storeEmbeddingBo.setDocId(docId);
|
storeEmbeddingBo.setDocId(docId);
|
||||||
storeEmbeddingBo.setFids(fids);
|
storeEmbeddingBo.setFids(fids);
|
||||||
storeEmbeddingBo.setChunkList(chunkList);
|
storeEmbeddingBo.setChunkList(chunkList);
|
||||||
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
|
storeEmbeddingBo.setVectorStoreName(knowledgeInfoVo.getVectorModelName());
|
||||||
storeEmbeddingBo.setEmbeddingModelId(knowledgeInfoVo.getEmbeddingModelId());
|
|
||||||
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
|
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
|
||||||
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
|
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
|
||||||
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
|
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
|
||||||
|
|||||||
Reference in New Issue
Block a user