fix: 1. 修复Milvus向量存储时索引越界 2. 移除无用首页

This commit is contained in:
ageerle
2025-03-25 09:57:53 +08:00
parent e893110ee5
commit 0639a3ef16
184 changed files with 159 additions and 11795 deletions

View File

@@ -239,7 +239,7 @@ public class MilvusVectorStore implements VectorStore {
List<List<Float>> vectorFloatList = new ArrayList<>();
List<String> kidList = new ArrayList<>();
List<String> docIdList = new ArrayList<>();
for (int i = 0; i < chunkList.size(); i++) {
for (int i = 0; i < Math.min(chunkList.size(), vectorList.size()); i++) {
List<Double> vector = vectorList.get(i);
List<Float> vfList = new ArrayList<>();
for (int j = 0; j < vector.size(); j++) {

View File

@@ -40,7 +40,7 @@ import java.util.Map;
@Service
@Slf4j
public class WeaviateVectorStore implements VectorStore{
public class WeaviateVectorStore implements VectorStore {
private volatile String protocol;
private volatile String host;
@@ -56,18 +56,18 @@ public class WeaviateVectorStore implements VectorStore{
@PostConstruct
public void loadConfig() {
this.protocol = configService.getConfigValue("weaviate", "protocol");
this.protocol = configService.getConfigValue("weaviate", "protocol");
this.host = configService.getConfigValue("weaviate", "host");
this.className = configService.getConfigValue("weaviate", "classname");
}
public WeaviateClient getClient(){
public WeaviateClient getClient() {
Config config = new Config(protocol, host);
WeaviateClient client = new WeaviateClient(config);
return client;
}
public Result<Meta> getMeta(){
public Result<Meta> getMeta() {
WeaviateClient client = getClient();
Result<Meta> meta = client.misc().metaGetter().run();
if (meta.getError() == null) {
@@ -80,114 +80,135 @@ public class WeaviateVectorStore implements VectorStore{
return meta;
}
public Result<Schema> getSchemas(){
public Result<Schema> getSchemas() {
WeaviateClient client = getClient();
Result<Schema> result = client.schema().getter().run();
if (result.hasErrors()) {
System.out.println(result.getError());
}else {
} else {
System.out.println(result.getResult());
}
return result;
}
public Result<Boolean> createSchema(String kid){
public Result<Boolean> createSchema(String kid) {
WeaviateClient client = getClient();
VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder()
.distance("cosine")
.cleanupIntervalSeconds(300)
.efConstruction(128)
.maxConnections(64)
.vectorCacheMaxObjects(500000L)
.ef(-1)
.skip(false)
.dynamicEfFactor(8)
.dynamicEfMax(500)
.dynamicEfMin(100)
.flatSearchCutoff(40000)
.build();
.distance("cosine")
.cleanupIntervalSeconds(300)
.efConstruction(128)
.maxConnections(64)
.vectorCacheMaxObjects(500000L)
.ef(-1)
.skip(false)
.dynamicEfFactor(8)
.dynamicEfMax(500)
.dynamicEfMin(100)
.flatSearchCutoff(40000)
.build();
ShardingConfig shardingConfig = ShardingConfig.builder()
.desiredCount(3)
.desiredVirtualCount(128)
.function("murmur3")
.key("_id")
.strategy("hash")
.virtualPerPhysical(128)
.build();
.desiredCount(3)
.desiredVirtualCount(128)
.function("murmur3")
.key("_id")
.strategy("hash")
.virtualPerPhysical(128)
.build();
ReplicationConfig replicationConfig = ReplicationConfig.builder()
.factor(1)
.build();
.factor(1)
.build();
JSONObject classModuleConfigValue = new JSONObject();
classModuleConfigValue.put("vectorizeClassName",false);
classModuleConfigValue.put("vectorizeClassName", false);
JSONObject classModuleConfig = new JSONObject();
classModuleConfig.put("text2vec-transformers",classModuleConfigValue);
classModuleConfig.put("text2vec-transformers", classModuleConfigValue);
JSONObject propertyModuleConfigValueSkipTrue = new JSONObject();
propertyModuleConfigValueSkipTrue.put("vectorizePropertyName",false);
propertyModuleConfigValueSkipTrue.put("skip",true);
propertyModuleConfigValueSkipTrue.put("vectorizePropertyName", false);
propertyModuleConfigValueSkipTrue.put("skip", true);
JSONObject propertyModuleConfigSkipTrue = new JSONObject();
propertyModuleConfigSkipTrue.put("text2vec-transformers",propertyModuleConfigValueSkipTrue);
propertyModuleConfigSkipTrue.put("text2vec-transformers", propertyModuleConfigValueSkipTrue);
JSONObject propertyModuleConfigValueSkipFalse = new JSONObject();
propertyModuleConfigValueSkipFalse.put("vectorizePropertyName",false);
propertyModuleConfigValueSkipFalse.put("skip",false);
propertyModuleConfigValueSkipFalse.put("vectorizePropertyName", false);
propertyModuleConfigValueSkipFalse.put("skip", false);
JSONObject propertyModuleConfigSkipFalse = new JSONObject();
propertyModuleConfigSkipFalse.put("text2vec-transformers",propertyModuleConfigValueSkipFalse);
propertyModuleConfigSkipFalse.put("text2vec-transformers", propertyModuleConfigValueSkipFalse);
WeaviateClass clazz = WeaviateClass.builder()
.className(className + kid)
.description("local knowledge")
.vectorIndexType("hnsw")
.vectorizer("text2vec-transformers")
.shardingConfig(shardingConfig)
.vectorIndexConfig(vectorIndexConfig)
.replicationConfig(replicationConfig)
.moduleConfig(classModuleConfig)
.properties(new ArrayList() {{
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("content")
.description("The content of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipFalse)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("kid")
.description("The knowledge id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("docId")
.description("The doc id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("fid")
.description("The fragment id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("uuid")
.description("The uuid id of the local knowledge fragment(same with id properties),for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
} })
.build();
.className(className + kid)
.description("local knowledge")
.vectorIndexType("hnsw")
.vectorizer("text2vec-transformers")
.shardingConfig(shardingConfig)
.vectorIndexConfig(vectorIndexConfig)
.replicationConfig(replicationConfig)
.moduleConfig(classModuleConfig)
.properties(new ArrayList() {
{
add(Property.builder()
.dataType(new ArrayList() {
{
add(DataType.TEXT);
}
})
.name("content")
.description("The content of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipFalse)
.build());
add(Property.builder()
.dataType(new ArrayList() {
{
add(DataType.TEXT);
}
})
.name("kid")
.description("The knowledge id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList() {
{
add(DataType.TEXT);
}
})
.name("docId")
.description("The doc id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList() {
{
add(DataType.TEXT);
}
})
.name("fid")
.description("The fragment id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList() {
{
add(DataType.TEXT);
}
})
.name("uuid")
.description("The uuid id of the local knowledge fragment(same with id properties),for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
} })
.build();
Result<Boolean> result = client.schema().classCreator().withClass(clazz).run();
if (result.hasErrors()) {
System.out.println(result.getError());
}
System.out.println(result.getResult());
return result;
return result;
}
@Override
@@ -201,7 +222,7 @@ public class WeaviateVectorStore implements VectorStore{
WeaviateClient client = getClient();
Field fieldId = Field.builder().name("uuid").build();
WhereFilter where = WhereFilter.builder()
.path(new String[]{ "fid" })
.path(new String[]{"fid"})
.operator(Operator.Equal)
.valueString(fid)
.build();
@@ -210,10 +231,10 @@ public class WeaviateVectorStore implements VectorStore{
.withFields(fieldId)
.withWhere(where)
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
for (LinkedTreeMap linkedTreeMap : m) {
String uuid = linkedTreeMap.get("uuid").toString();
resultList.add(uuid);
}
@@ -227,59 +248,59 @@ public class WeaviateVectorStore implements VectorStore{
}
@Override
public void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList,String kid, String docId,List<String> fidList) {
public void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList, String kid, String docId, List<String> fidList) {
WeaviateClient client = getClient();
if (vectorList != null) {
for (int i = 0; i < Math.min(chunkList.size(), vectorList.size()); i++) {
List<Double> vector = vectorList.get(i);
Float[] vf = vector.stream().map(Double::floatValue).toArray(Float[]::new);
Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("content", chunkList.get(i));
dataSchema.put("kid", kid);
dataSchema.put("docId", docId);
dataSchema.put("fid", fidList.get(i));
String uuid = UUID.randomUUID().toString();
dataSchema.put("uuid", uuid);
for (int i = 0; i < Math.min(chunkList.size(), vectorList.size()); i++) {
List<Double> vector = vectorList.get(i);
Float[] vf = vector.stream().map(Double::floatValue).toArray(Float[]::new);
Result<WeaviateObject> result = client.data().creator()
.withClassName(className + kid)
.withID(uuid)
.withVector(vf)
.withProperties(dataSchema)
.run();
}
Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("content", chunkList.get(i));
dataSchema.put("kid", kid);
dataSchema.put("docId", docId);
dataSchema.put("fid", fidList.get(i));
String uuid = UUID.randomUUID().toString();
dataSchema.put("uuid", uuid);
Result<WeaviateObject> result = client.data().creator()
.withClassName(className + kid)
.withID(uuid)
.withVector(vf)
.withProperties(dataSchema)
.run();
}
}
@Override
public void removeByDocId(String kid,String docId) {
public void removeByDocId(String kid, String docId) {
List<String> resultList = new ArrayList<>();
WeaviateClient client = getClient();
Field fieldId = Field.builder().name("uuid").build();
WhereFilter where = WhereFilter.builder()
.path(new String[]{ "docId" })
.operator(Operator.Equal)
.valueString(docId)
.build();
.path(new String[]{"docId"})
.operator(Operator.Equal)
.valueString(docId)
.build();
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(fieldId)
.withWhere(where)
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
.withClassName(className + kid)
.withFields(fieldId)
.withWhere(where)
.run();
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
for (LinkedTreeMap linkedTreeMap : m) {
String uuid = linkedTreeMap.get("uuid").toString();
resultList.add(uuid);
}
for (String uuid : resultList) {
Result<Boolean> deleteResult = client.data().deleter()
.withID(uuid)
.withClassName(className + kid)
.withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM
.run();
.withID(uuid)
.withClassName(className + kid)
.withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM
.run();
}
}
@@ -289,15 +310,15 @@ public class WeaviateVectorStore implements VectorStore{
Result<Boolean> result = client.schema().classDeleter().withClassName(className + kid).run();
if (result.hasErrors()) {
System.out.println("删除schema失败" + result.getError());
}else {
} else {
System.out.println("删除schema成功" + result.getResult());
}
log.info("drop schema by kid, result = {}",result);
log.info("drop schema by kid, result = {}", result);
}
@Override
public List<String> nearest(List<Double> queryVector,String kid) {
if (StringUtils.isBlank(kid)){
public List<String> nearest(List<Double> queryVector, String kid) {
if (StringUtils.isBlank(kid)) {
return new ArrayList<String>();
}
List<String> resultList = new ArrayList<>();
@@ -320,14 +341,14 @@ public class WeaviateVectorStore implements VectorStore{
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(contentField,_additional)
.withFields(contentField, _additional)
.withNearVector(nearVector)
.withLimit(knowledgeInfoVo.getRetrieveLimit())
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
for (LinkedTreeMap linkedTreeMap : m) {
String content = linkedTreeMap.get("content").toString();
resultList.add(content);
}
@@ -335,8 +356,8 @@ public class WeaviateVectorStore implements VectorStore{
}
@Override
public List<String> nearest(String query,String kid) {
if (StringUtils.isBlank(kid)){
public List<String> nearest(String query, String kid) {
if (StringUtils.isBlank(kid)) {
return new ArrayList<String>();
}
List<String> resultList = new ArrayList<>();
@@ -348,20 +369,20 @@ public class WeaviateVectorStore implements VectorStore{
Field.builder().name("distance").build()
}).build();
NearTextArgument nearText = client.graphQL().arguments().nearTextArgBuilder()
.concepts(new String[]{ query })
.concepts(new String[]{query})
.distance(1.6f) // certainty = 1f - distance /2f
.build();
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(contentField,_additional)
.withFields(contentField, _additional)
.withNearText(nearText)
.withLimit(knowledgeInfoVo.getRetrieveLimit())
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
for (LinkedTreeMap linkedTreeMap : m) {
String content = linkedTreeMap.get("content").toString();
resultList.add(content);
}
@@ -370,10 +391,10 @@ public class WeaviateVectorStore implements VectorStore{
public Result<Boolean> deleteSchema(String kid) {
WeaviateClient client = getClient();
Result<Boolean> result = client.schema().classDeleter().withClassName(className+ kid).run();
Result<Boolean> result = client.schema().classDeleter().withClassName(className + kid).run();
if (result.hasErrors()) {
System.out.println(result.getError());
}else {
} else {
System.out.println(result.getResult());
}
return result;