mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-16 05:13:41 +00:00
feat: 接入langchain4j操作向量库
This commit is contained in:
@@ -16,8 +16,21 @@
|
|||||||
<maven.compiler.source>17</maven.compiler.source>
|
<maven.compiler.source>17</maven.compiler.source>
|
||||||
<maven.compiler.target>17</maven.compiler.target>
|
<maven.compiler.target>17</maven.compiler.target>
|
||||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||||
|
<langchain4j.version>1.0.0-beta4</langchain4j.version>
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
|
<dependencyManagement>
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>dev.langchain4j</groupId>
|
||||||
|
<artifactId>langchain4j-bom</artifactId>
|
||||||
|
<version>${langchain4j.version}</version>
|
||||||
|
<type>pom</type>
|
||||||
|
<scope>import</scope>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
</dependencyManagement>
|
||||||
|
|
||||||
<dependencies>
|
<dependencies>
|
||||||
|
|
||||||
<!-- pdf解析器 -->
|
<!-- pdf解析器 -->
|
||||||
@@ -47,6 +60,35 @@
|
|||||||
<version>4.0.0</version>
|
<version>4.0.0</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dev.langchain4j</groupId>
|
||||||
|
<artifactId>langchain4j</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dev.langchain4j</groupId>
|
||||||
|
<artifactId>langchain4j-weaviate</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dev.langchain4j</groupId>
|
||||||
|
<artifactId>langchain4j-embeddings-all-minilm-l6-v2</artifactId>
|
||||||
|
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.testcontainers</groupId>
|
||||||
|
<artifactId>weaviate</artifactId>
|
||||||
|
<version>1.19.6</version>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
|
<dependency>
|
||||||
|
<groupId>dev.langchain4j</groupId>
|
||||||
|
<artifactId>langchain4j-open-ai-spring-boot-starter</artifactId>
|
||||||
|
</dependency>
|
||||||
|
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|||||||
@@ -1,20 +0,0 @@
|
|||||||
package org.ruoyi.service;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public interface EmbeddingService {
|
|
||||||
|
|
||||||
void storeEmbeddings(List<String> chunkList, String kid, String docId,List<String> fidList);
|
|
||||||
|
|
||||||
void removeByDocId(String kid,String docId);
|
|
||||||
|
|
||||||
void removeByKid(String kid);
|
|
||||||
|
|
||||||
List<Double> getQueryVector(String query, String kid);
|
|
||||||
|
|
||||||
void createSchema(String kid);
|
|
||||||
|
|
||||||
void removeByKidAndFid(String kid, String fid);
|
|
||||||
|
|
||||||
void saveFragment(String kid, String docId, String fid, String content);
|
|
||||||
}
|
|
||||||
@@ -2,22 +2,18 @@ package org.ruoyi.service;
|
|||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
|
||||||
* 向量存储
|
|
||||||
*/
|
|
||||||
public interface VectorStoreService {
|
public interface VectorStoreService {
|
||||||
|
|
||||||
void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList, String kid, String docId, List<String> fidList);
|
void storeEmbeddings(List<String> chunkList, String kid);
|
||||||
|
|
||||||
void removeByDocId(String kid, String docId);
|
void removeByDocId(String kid,String docId);
|
||||||
|
|
||||||
void removeByKid(String kid);
|
void removeByKid(String kid);
|
||||||
|
|
||||||
List<String> nearest(List<Double> queryVector, String kid);
|
List<String> getQueryVector(String query, String kid);
|
||||||
|
|
||||||
List<String> nearest(String query, String kid);
|
void createSchema(String kid);
|
||||||
|
|
||||||
void newSchema(String kid);
|
|
||||||
|
|
||||||
void removeByKidAndFid(String kid, String fid);
|
void removeByKidAndFid(String kid, String fid);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,64 +0,0 @@
|
|||||||
package org.ruoyi.service.impl;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import org.ruoyi.service.EmbeddingService;
|
|
||||||
import org.ruoyi.service.VectorStoreService;
|
|
||||||
import org.ruoyi.service.VectorizationService;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
@Service
|
|
||||||
@AllArgsConstructor
|
|
||||||
public class EmbeddingServiceImpl implements EmbeddingService {
|
|
||||||
|
|
||||||
private final VectorStoreService vectorStore;
|
|
||||||
private final VectorizationService vectorization;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* 保存向量数据库
|
|
||||||
* @param chunkList 文档按行切分的片段
|
|
||||||
* @param kid 知识库ID
|
|
||||||
* @param docId 文档ID
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void storeEmbeddings(List<String> chunkList, String kid, String docId,List<String> fidList) {
|
|
||||||
List<List<Double>> vectorList = vectorization.batchVectorization(chunkList, kid);
|
|
||||||
vectorStore.storeEmbeddings(chunkList,vectorList,kid,docId,fidList);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeByDocId(String kid,String docId) {
|
|
||||||
vectorStore.removeByDocId(kid,docId);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeByKid(String kid) {
|
|
||||||
vectorStore.removeByKid(kid);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<Double> getQueryVector(String query, String kid) {
|
|
||||||
return vectorization.singleVectorization(query,kid);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void createSchema(String kid) {
|
|
||||||
vectorStore.newSchema(kid);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void removeByKidAndFid(String kid, String fid) {
|
|
||||||
vectorStore.removeByKidAndFid(kid,fid);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void saveFragment(String kid, String docId, String fid, String content) {
|
|
||||||
List<String> chunkList = new ArrayList<>();
|
|
||||||
List<String> fidList = new ArrayList<>();
|
|
||||||
chunkList.add(content);
|
|
||||||
fidList.add(fid);
|
|
||||||
storeEmbeddings(chunkList,kid,docId,fidList);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,37 +1,25 @@
|
|||||||
package org.ruoyi.service.impl;
|
package org.ruoyi.service.impl;
|
||||||
|
|
||||||
import cn.hutool.core.lang.UUID;
|
import cn.hutool.core.util.RandomUtil;
|
||||||
import cn.hutool.json.JSONObject;
|
import dev.langchain4j.data.embedding.Embedding;
|
||||||
import com.google.gson.internal.LinkedTreeMap;
|
import dev.langchain4j.data.segment.TextSegment;
|
||||||
import io.weaviate.client.Config;
|
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||||
import io.weaviate.client.WeaviateClient;
|
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
|
||||||
import io.weaviate.client.base.Result;
|
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
||||||
import io.weaviate.client.v1.data.model.WeaviateObject;
|
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
||||||
import io.weaviate.client.v1.data.replication.model.ConsistencyLevel;
|
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||||
import io.weaviate.client.v1.filters.Operator;
|
import dev.langchain4j.store.embedding.filter.Filter;
|
||||||
import io.weaviate.client.v1.filters.WhereFilter;
|
import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
|
||||||
import io.weaviate.client.v1.graphql.model.GraphQLResponse;
|
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
|
||||||
import io.weaviate.client.v1.graphql.query.argument.NearTextArgument;
|
|
||||||
import io.weaviate.client.v1.graphql.query.argument.NearVectorArgument;
|
|
||||||
import io.weaviate.client.v1.graphql.query.fields.Field;
|
|
||||||
import io.weaviate.client.v1.misc.model.Meta;
|
|
||||||
import io.weaviate.client.v1.misc.model.ReplicationConfig;
|
|
||||||
import io.weaviate.client.v1.misc.model.ShardingConfig;
|
|
||||||
import io.weaviate.client.v1.misc.model.VectorIndexConfig;
|
|
||||||
import io.weaviate.client.v1.schema.model.DataType;
|
|
||||||
import io.weaviate.client.v1.schema.model.Property;
|
|
||||||
import io.weaviate.client.v1.schema.model.Schema;
|
|
||||||
import io.weaviate.client.v1.schema.model.WeaviateClass;
|
|
||||||
import jakarta.annotation.PostConstruct;
|
import jakarta.annotation.PostConstruct;
|
||||||
import jakarta.annotation.Resource;
|
import jakarta.annotation.Resource;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
|
||||||
import org.ruoyi.common.core.service.ConfigService;
|
import org.ruoyi.common.core.service.ConfigService;
|
||||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
|
||||||
import org.ruoyi.service.IKnowledgeInfoService;
|
|
||||||
import org.ruoyi.service.VectorStoreService;
|
import org.ruoyi.service.VectorStoreService;
|
||||||
|
import org.ruoyi.service.IKnowledgeInfoService;
|
||||||
import org.springframework.context.annotation.Lazy;
|
import org.springframework.context.annotation.Lazy;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.testcontainers.weaviate.WeaviateContainer;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
@@ -54,6 +42,8 @@ public class WeaviateVectorStoreImpl implements VectorStoreService {
|
|||||||
@Resource
|
@Resource
|
||||||
private ConfigService configService;
|
private ConfigService configService;
|
||||||
|
|
||||||
|
private EmbeddingStore<TextSegment> embeddingStore;
|
||||||
|
|
||||||
@PostConstruct
|
@PostConstruct
|
||||||
public void loadConfig() {
|
public void loadConfig() {
|
||||||
this.protocol = configService.getConfigValue("weaviate", "protocol");
|
this.protocol = configService.getConfigValue("weaviate", "protocol");
|
||||||
@@ -61,342 +51,94 @@ public class WeaviateVectorStoreImpl implements VectorStoreService {
|
|||||||
this.className = configService.getConfigValue("weaviate", "classname");
|
this.className = configService.getConfigValue("weaviate", "classname");
|
||||||
}
|
}
|
||||||
|
|
||||||
public WeaviateClient getClient() {
|
|
||||||
Config config = new Config(protocol, host);
|
|
||||||
WeaviateClient client = new WeaviateClient(config);
|
|
||||||
return client;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Result<Meta> getMeta() {
|
@Override
|
||||||
WeaviateClient client = getClient();
|
public List<String> getQueryVector(String query, String kid) {
|
||||||
Result<Meta> meta = client.misc().metaGetter().run();
|
EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
|
||||||
if (meta.getError() == null) {
|
.apiKey(System.getenv("OPENAI_API_KEY"))
|
||||||
System.out.printf("meta.hostname: %s\n", meta.getResult().getHostname());
|
.baseUrl(System.getenv("OPENAI_BASE_URL"))
|
||||||
System.out.printf("meta.version: %s\n", meta.getResult().getVersion());
|
.modelName("text-embedding-3-small")
|
||||||
System.out.printf("meta.modules: %s\n", meta.getResult().getModules());
|
|
||||||
} else {
|
|
||||||
System.out.printf("Error: %s\n", meta.getError().getMessages());
|
|
||||||
}
|
|
||||||
return meta;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Result<Schema> getSchemas() {
|
|
||||||
WeaviateClient client = getClient();
|
|
||||||
Result<Schema> result = client.schema().getter().run();
|
|
||||||
if (result.hasErrors()) {
|
|
||||||
System.out.println(result.getError());
|
|
||||||
} else {
|
|
||||||
System.out.println(result.getResult());
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public Result<Boolean> createSchema(String kid) {
|
|
||||||
WeaviateClient client = getClient();
|
|
||||||
|
|
||||||
VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder()
|
|
||||||
.distance("cosine")
|
|
||||||
.cleanupIntervalSeconds(300)
|
|
||||||
.efConstruction(128)
|
|
||||||
.maxConnections(64)
|
|
||||||
.vectorCacheMaxObjects(500000L)
|
|
||||||
.ef(-1)
|
|
||||||
.skip(false)
|
|
||||||
.dynamicEfFactor(8)
|
|
||||||
.dynamicEfMax(500)
|
|
||||||
.dynamicEfMin(100)
|
|
||||||
.flatSearchCutoff(40000)
|
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
ShardingConfig shardingConfig = ShardingConfig.builder()
|
Filter simpleFilter = new IsEqualTo("kid", kid);
|
||||||
.desiredCount(3)
|
|
||||||
.desiredVirtualCount(128)
|
Embedding queryEmbedding = embeddingModel.embed("What is your favourite sport?").content();
|
||||||
.function("murmur3")
|
EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
|
||||||
.key("_id")
|
.queryEmbedding(queryEmbedding)
|
||||||
.strategy("hash")
|
.maxResults(3)
|
||||||
.virtualPerPhysical(128)
|
// 添加过滤条件
|
||||||
|
.filter(simpleFilter)
|
||||||
.build();
|
.build();
|
||||||
|
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches();
|
||||||
|
|
||||||
ReplicationConfig replicationConfig = ReplicationConfig.builder()
|
List<String> results = new ArrayList<>();
|
||||||
.factor(1)
|
|
||||||
.build();
|
|
||||||
|
|
||||||
JSONObject classModuleConfigValue = new JSONObject();
|
matches.forEach(embeddingMatch -> {
|
||||||
classModuleConfigValue.put("vectorizeClassName", false);
|
results.add(embeddingMatch.embedded().text());
|
||||||
JSONObject classModuleConfig = new JSONObject();
|
});
|
||||||
classModuleConfig.put("text2vec-transformers", classModuleConfigValue);
|
|
||||||
|
|
||||||
JSONObject propertyModuleConfigValueSkipTrue = new JSONObject();
|
return results;
|
||||||
propertyModuleConfigValueSkipTrue.put("vectorizePropertyName", false);
|
|
||||||
propertyModuleConfigValueSkipTrue.put("skip", true);
|
|
||||||
JSONObject propertyModuleConfigSkipTrue = new JSONObject();
|
|
||||||
propertyModuleConfigSkipTrue.put("text2vec-transformers", propertyModuleConfigValueSkipTrue);
|
|
||||||
|
|
||||||
JSONObject propertyModuleConfigValueSkipFalse = new JSONObject();
|
|
||||||
propertyModuleConfigValueSkipFalse.put("vectorizePropertyName", false);
|
|
||||||
propertyModuleConfigValueSkipFalse.put("skip", false);
|
|
||||||
JSONObject propertyModuleConfigSkipFalse = new JSONObject();
|
|
||||||
propertyModuleConfigSkipFalse.put("text2vec-transformers", propertyModuleConfigValueSkipFalse);
|
|
||||||
|
|
||||||
WeaviateClass clazz = WeaviateClass.builder()
|
|
||||||
.className(className + kid)
|
|
||||||
.description("local knowledge")
|
|
||||||
.vectorIndexType("hnsw")
|
|
||||||
.vectorizer("text2vec-transformers")
|
|
||||||
.shardingConfig(shardingConfig)
|
|
||||||
.vectorIndexConfig(vectorIndexConfig)
|
|
||||||
.replicationConfig(replicationConfig)
|
|
||||||
.moduleConfig(classModuleConfig)
|
|
||||||
.properties(new ArrayList() {
|
|
||||||
{
|
|
||||||
add(Property.builder()
|
|
||||||
.dataType(new ArrayList() {
|
|
||||||
{
|
|
||||||
add(DataType.TEXT);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.name("content")
|
|
||||||
.description("The content of the local knowledge,for search")
|
|
||||||
.moduleConfig(propertyModuleConfigSkipFalse)
|
|
||||||
.build());
|
|
||||||
add(Property.builder()
|
|
||||||
.dataType(new ArrayList() {
|
|
||||||
{
|
|
||||||
add(DataType.TEXT);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.name("kid")
|
|
||||||
.description("The knowledge id of the local knowledge,for search")
|
|
||||||
.moduleConfig(propertyModuleConfigSkipTrue)
|
|
||||||
.build());
|
|
||||||
add(Property.builder()
|
|
||||||
.dataType(new ArrayList() {
|
|
||||||
{
|
|
||||||
add(DataType.TEXT);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.name("docId")
|
|
||||||
.description("The doc id of the local knowledge,for search")
|
|
||||||
.moduleConfig(propertyModuleConfigSkipTrue)
|
|
||||||
.build());
|
|
||||||
add(Property.builder()
|
|
||||||
.dataType(new ArrayList() {
|
|
||||||
{
|
|
||||||
add(DataType.TEXT);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.name("fid")
|
|
||||||
.description("The fragment id of the local knowledge,for search")
|
|
||||||
.moduleConfig(propertyModuleConfigSkipTrue)
|
|
||||||
.build());
|
|
||||||
add(Property.builder()
|
|
||||||
.dataType(new ArrayList() {
|
|
||||||
{
|
|
||||||
add(DataType.TEXT);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.name("uuid")
|
|
||||||
.description("The uuid id of the local knowledge fragment(same with id properties),for search")
|
|
||||||
.moduleConfig(propertyModuleConfigSkipTrue)
|
|
||||||
.build());
|
|
||||||
} })
|
|
||||||
.build();
|
|
||||||
|
|
||||||
Result<Boolean> result = client.schema().classCreator().withClass(clazz).run();
|
|
||||||
if (result.hasErrors()) {
|
|
||||||
System.out.println(result.getError());
|
|
||||||
}
|
|
||||||
System.out.println(result.getResult());
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void newSchema(String kid) {
|
public void createSchema(String kid) {
|
||||||
createSchema(kid);
|
WeaviateContainer weaviate = new WeaviateContainer(protocol);
|
||||||
}
|
weaviate.start();
|
||||||
|
this.embeddingStore = WeaviateEmbeddingStore.builder()
|
||||||
@Override
|
.scheme("http")
|
||||||
public void removeByKidAndFid(String kid, String fid) {
|
.host(host)
|
||||||
List<String> resultList = new ArrayList<>();
|
.objectClass(className+kid)
|
||||||
WeaviateClient client = getClient();
|
.scheme(protocol)
|
||||||
Field fieldId = Field.builder().name("uuid").build();
|
.avoidDups(true)
|
||||||
WhereFilter where = WhereFilter.builder()
|
.consistencyLevel("ALL")
|
||||||
.path(new String[]{"fid"})
|
|
||||||
.operator(Operator.Equal)
|
|
||||||
.valueString(fid)
|
|
||||||
.build();
|
.build();
|
||||||
Result<GraphQLResponse> result = client.graphQL().get()
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withFields(fieldId)
|
|
||||||
.withWhere(where)
|
|
||||||
.run();
|
|
||||||
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
|
|
||||||
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
|
|
||||||
ArrayList<LinkedTreeMap> m = l.get(className + kid);
|
|
||||||
for (LinkedTreeMap linkedTreeMap : m) {
|
|
||||||
String uuid = linkedTreeMap.get("uuid").toString();
|
|
||||||
resultList.add(uuid);
|
|
||||||
}
|
|
||||||
for (String uuid : resultList) {
|
|
||||||
Result<Boolean> deleteResult = client.data().deleter()
|
|
||||||
.withID(uuid)
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM
|
|
||||||
.run();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList, String kid, String docId, List<String> fidList) {
|
public void storeEmbeddings(List<String> chunkList,String kid) {
|
||||||
WeaviateClient client = getClient();
|
EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
|
||||||
|
.apiKey(System.getenv("OPENAI_API_KEY"))
|
||||||
for (int i = 0; i < Math.min(chunkList.size(), vectorList.size()); i++) {
|
.baseUrl(System.getenv("OPENAI_BASE_URL"))
|
||||||
List<Double> vector = vectorList.get(i);
|
.modelName("text-embedding-3-small")
|
||||||
Float[] vf = vector.stream().map(Double::floatValue).toArray(Float[]::new);
|
.build();
|
||||||
|
// 生成文档id
|
||||||
|
String docId = RandomUtil.randomString(10);
|
||||||
|
chunkList.forEach(chunk -> {
|
||||||
|
// 生成知识块id
|
||||||
|
String fid = RandomUtil.randomString(10);
|
||||||
Map<String, Object> dataSchema = new HashMap<>();
|
Map<String, Object> dataSchema = new HashMap<>();
|
||||||
dataSchema.put("content", chunkList.get(i));
|
|
||||||
dataSchema.put("kid", kid);
|
dataSchema.put("kid", kid);
|
||||||
dataSchema.put("docId", docId);
|
dataSchema.put("docId", docId);
|
||||||
dataSchema.put("fid", fidList.get(i));
|
dataSchema.put("fid", fid);
|
||||||
String uuid = UUID.randomUUID().toString();
|
TextSegment segment = TextSegment.from(chunk);
|
||||||
dataSchema.put("uuid", uuid);
|
segment.metadata().putAll(dataSchema);
|
||||||
|
Embedding content = embeddingModel.embed(segment).content();
|
||||||
|
embeddingStore.add(content);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
Result<WeaviateObject> result = client.data().creator()
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withID(uuid)
|
|
||||||
.withVector(vf)
|
|
||||||
.withProperties(dataSchema)
|
|
||||||
.run();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void removeByKid(String kid) {
|
||||||
|
// 根据条件删除向量数据
|
||||||
|
Filter simpleFilter = new IsEqualTo("kid", kid);
|
||||||
|
embeddingStore.removeAll(simpleFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void removeByDocId(String kid, String docId) {
|
public void removeByDocId(String kid, String docId) {
|
||||||
List<String> resultList = new ArrayList<>();
|
// 根据条件删除向量数据
|
||||||
WeaviateClient client = getClient();
|
Filter simpleFilterByDocId = new IsEqualTo("docId", docId);
|
||||||
Field fieldId = Field.builder().name("uuid").build();
|
embeddingStore.removeAll(simpleFilterByDocId);
|
||||||
WhereFilter where = WhereFilter.builder()
|
|
||||||
.path(new String[]{"docId"})
|
|
||||||
.operator(Operator.Equal)
|
|
||||||
.valueString(docId)
|
|
||||||
.build();
|
|
||||||
Result<GraphQLResponse> result = client.graphQL().get()
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withFields(fieldId)
|
|
||||||
.withWhere(where)
|
|
||||||
.run();
|
|
||||||
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
|
|
||||||
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
|
|
||||||
ArrayList<LinkedTreeMap> m = l.get(className + kid);
|
|
||||||
for (LinkedTreeMap linkedTreeMap : m) {
|
|
||||||
String uuid = linkedTreeMap.get("uuid").toString();
|
|
||||||
resultList.add(uuid);
|
|
||||||
}
|
|
||||||
for (String uuid : resultList) {
|
|
||||||
Result<Boolean> deleteResult = client.data().deleter()
|
|
||||||
.withID(uuid)
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM
|
|
||||||
.run();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void removeByKid(String kid) {
|
public void removeByKidAndFid(String kid, String fid) {
|
||||||
WeaviateClient client = getClient();
|
// 根据条件删除向量数据
|
||||||
Result<Boolean> result = client.schema().classDeleter().withClassName(className + kid).run();
|
Filter simpleFilterByKid = new IsEqualTo("kid", kid);
|
||||||
if (result.hasErrors()) {
|
Filter simpleFilterFid = new IsEqualTo("fid", fid);
|
||||||
System.out.println("删除schema失败" + result.getError());
|
Filter simpleFilterByAnd = Filter.and(simpleFilterFid, simpleFilterByKid);
|
||||||
} else {
|
embeddingStore.removeAll(simpleFilterByAnd);
|
||||||
System.out.println("删除schema成功" + result.getResult());
|
|
||||||
}
|
|
||||||
log.info("drop schema by kid, result = {}", result);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> nearest(List<Double> queryVector, String kid) {
|
|
||||||
if (StringUtils.isBlank(kid)) {
|
|
||||||
return new ArrayList<String>();
|
|
||||||
}
|
|
||||||
List<String> resultList = new ArrayList<>();
|
|
||||||
Float[] vf = new Float[queryVector.size()];
|
|
||||||
for (int j = 0; j < queryVector.size(); j++) {
|
|
||||||
Double value = queryVector.get(j);
|
|
||||||
vf[j] = value.floatValue();
|
|
||||||
}
|
|
||||||
WeaviateClient client = getClient();
|
|
||||||
Field contentField = Field.builder().name("content").build();
|
|
||||||
Field _additional = Field.builder()
|
|
||||||
.name("_additional")
|
|
||||||
.fields(new Field[]{
|
|
||||||
Field.builder().name("distance").build()
|
|
||||||
}).build();
|
|
||||||
NearVectorArgument nearVector = NearVectorArgument.builder()
|
|
||||||
.vector(vf)
|
|
||||||
.distance(1.6f) // certainty = 1f - distance /2f
|
|
||||||
.build();
|
|
||||||
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
|
|
||||||
Result<GraphQLResponse> result = client.graphQL().get()
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withFields(contentField, _additional)
|
|
||||||
.withNearVector(nearVector)
|
|
||||||
.withLimit(knowledgeInfoVo.getRetrieveLimit())
|
|
||||||
.run();
|
|
||||||
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
|
|
||||||
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
|
|
||||||
ArrayList<LinkedTreeMap> m = l.get(className + kid);
|
|
||||||
for (LinkedTreeMap linkedTreeMap : m) {
|
|
||||||
String content = linkedTreeMap.get("content").toString();
|
|
||||||
resultList.add(content);
|
|
||||||
}
|
|
||||||
return resultList;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> nearest(String query, String kid) {
|
|
||||||
if (StringUtils.isBlank(kid)) {
|
|
||||||
return new ArrayList<String>();
|
|
||||||
}
|
|
||||||
List<String> resultList = new ArrayList<>();
|
|
||||||
WeaviateClient client = getClient();
|
|
||||||
Field contentField = Field.builder().name("content").build();
|
|
||||||
Field _additional = Field.builder()
|
|
||||||
.name("_additional")
|
|
||||||
.fields(new Field[]{
|
|
||||||
Field.builder().name("distance").build()
|
|
||||||
}).build();
|
|
||||||
NearTextArgument nearText = client.graphQL().arguments().nearTextArgBuilder()
|
|
||||||
.concepts(new String[]{query})
|
|
||||||
.distance(1.6f) // certainty = 1f - distance /2f
|
|
||||||
.build();
|
|
||||||
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
|
|
||||||
Result<GraphQLResponse> result = client.graphQL().get()
|
|
||||||
.withClassName(className + kid)
|
|
||||||
.withFields(contentField, _additional)
|
|
||||||
.withNearText(nearText)
|
|
||||||
.withLimit(knowledgeInfoVo.getRetrieveLimit())
|
|
||||||
.run();
|
|
||||||
LinkedTreeMap<String, Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
|
|
||||||
LinkedTreeMap<String, ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
|
|
||||||
ArrayList<LinkedTreeMap> m = l.get(className + kid);
|
|
||||||
for (LinkedTreeMap linkedTreeMap : m) {
|
|
||||||
String content = linkedTreeMap.get("content").toString();
|
|
||||||
resultList.add(content);
|
|
||||||
}
|
|
||||||
return resultList;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Result<Boolean> deleteSchema(String kid) {
|
|
||||||
WeaviateClient client = getClient();
|
|
||||||
Result<Boolean> result = client.schema().classDeleter().withClassName(className + kid).run();
|
|
||||||
if (result.hasErrors()) {
|
|
||||||
System.out.println(result.getError());
|
|
||||||
} else {
|
|
||||||
System.out.println(result.getResult());
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -24,13 +24,11 @@ import org.ruoyi.common.core.utils.StringUtils;
|
|||||||
import org.ruoyi.common.core.utils.file.FileUtils;
|
import org.ruoyi.common.core.utils.file.FileUtils;
|
||||||
import org.ruoyi.common.core.utils.file.MimeTypeUtils;
|
import org.ruoyi.common.core.utils.file.MimeTypeUtils;
|
||||||
import org.ruoyi.common.redis.utils.RedisUtils;
|
import org.ruoyi.common.redis.utils.RedisUtils;
|
||||||
import org.ruoyi.domain.ChatSession;
|
|
||||||
import org.ruoyi.domain.bo.ChatSessionBo;
|
import org.ruoyi.domain.bo.ChatSessionBo;
|
||||||
import org.ruoyi.domain.vo.ChatModelVo;
|
import org.ruoyi.domain.vo.ChatModelVo;
|
||||||
import org.ruoyi.service.EmbeddingService;
|
import org.ruoyi.service.VectorStoreService;
|
||||||
import org.ruoyi.service.IChatModelService;
|
import org.ruoyi.service.IChatModelService;
|
||||||
import org.ruoyi.service.IChatSessionService;
|
import org.ruoyi.service.IChatSessionService;
|
||||||
import org.ruoyi.service.VectorStoreService;
|
|
||||||
import org.springframework.core.io.InputStreamResource;
|
import org.springframework.core.io.InputStreamResource;
|
||||||
import org.springframework.core.io.Resource;
|
import org.springframework.core.io.Resource;
|
||||||
import org.springframework.http.MediaType;
|
import org.springframework.http.MediaType;
|
||||||
@@ -56,7 +54,7 @@ public class SseServiceImpl implements ISseService {
|
|||||||
|
|
||||||
private final OpenAiStreamClient openAiStreamClient;
|
private final OpenAiStreamClient openAiStreamClient;
|
||||||
|
|
||||||
private final EmbeddingService embeddingService;
|
private final VectorStoreService vectorStoreService;
|
||||||
|
|
||||||
private final VectorStoreService vectorStore;
|
private final VectorStoreService vectorStore;
|
||||||
|
|
||||||
@@ -184,9 +182,7 @@ public class SseServiceImpl implements ISseService {
|
|||||||
if(StringUtils.isNotEmpty(chatRequest.getKid())){
|
if(StringUtils.isNotEmpty(chatRequest.getKid())){
|
||||||
List<Message> knMessages = new ArrayList<>();
|
List<Message> knMessages = new ArrayList<>();
|
||||||
String content = messages.get(messages.size() - 1).getContent().toString();
|
String content = messages.get(messages.size() - 1).getContent().toString();
|
||||||
List<String> nearestList;
|
List<String> nearestList = vectorStoreService.getQueryVector(content, chatRequest.getKid());
|
||||||
List<Double> queryVector = embeddingService.getQueryVector(content, chatRequest.getKid());
|
|
||||||
nearestList = vectorStore.nearest(queryVector, chatRequest.getKid());
|
|
||||||
for (String prompt : nearestList) {
|
for (String prompt : nearestList) {
|
||||||
Message userMessage = Message.builder().content(prompt).role(Message.Role.USER).build();
|
Message userMessage = Message.builder().content(prompt).role(Message.Role.USER).build();
|
||||||
knMessages.add(userMessage);
|
knMessages.add(userMessage);
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
|||||||
import org.ruoyi.mapper.KnowledgeAttachMapper;
|
import org.ruoyi.mapper.KnowledgeAttachMapper;
|
||||||
import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
||||||
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||||
import org.ruoyi.service.EmbeddingService;
|
import org.ruoyi.service.VectorStoreService;
|
||||||
import org.ruoyi.service.IKnowledgeInfoService;
|
import org.ruoyi.service.IKnowledgeInfoService;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
@@ -44,7 +44,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
|||||||
|
|
||||||
private final KnowledgeInfoMapper baseMapper;
|
private final KnowledgeInfoMapper baseMapper;
|
||||||
|
|
||||||
private final EmbeddingService embeddingService;
|
private final VectorStoreService vectorStoreService;
|
||||||
|
|
||||||
private final ResourceLoaderFactory resourceLoaderFactory;
|
private final ResourceLoaderFactory resourceLoaderFactory;
|
||||||
|
|
||||||
@@ -150,7 +150,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
|||||||
knowledgeInfo.setUid(LoginHelper.getLoginUser().getUserId());
|
knowledgeInfo.setUid(LoginHelper.getLoginUser().getUserId());
|
||||||
}
|
}
|
||||||
baseMapper.insert(knowledgeInfo);
|
baseMapper.insert(knowledgeInfo);
|
||||||
embeddingService.createSchema(String.valueOf(knowledgeInfo.getId()));
|
vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()));
|
||||||
}else {
|
}else {
|
||||||
baseMapper.updateById(knowledgeInfo);
|
baseMapper.updateById(knowledgeInfo);
|
||||||
}
|
}
|
||||||
@@ -165,7 +165,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
|||||||
check(knowledgeInfoList);
|
check(knowledgeInfoList);
|
||||||
// 删除向量库信息
|
// 删除向量库信息
|
||||||
knowledgeInfoList.forEach(knowledgeInfoVo -> {
|
knowledgeInfoList.forEach(knowledgeInfoVo -> {
|
||||||
embeddingService.removeByKid(String.valueOf(knowledgeInfoVo.getId()));
|
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()));
|
||||||
});
|
});
|
||||||
// 删除附件和知识片段
|
// 删除附件和知识片段
|
||||||
fragmentMapper.deleteByMap(map);
|
fragmentMapper.deleteByMap(map);
|
||||||
@@ -197,7 +197,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
|||||||
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
|
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
|
||||||
if (CollUtil.isNotEmpty(chunkList)) {
|
if (CollUtil.isNotEmpty(chunkList)) {
|
||||||
for (int i = 0; i < chunkList.size(); i++) {
|
for (int i = 0; i < chunkList.size(); i++) {
|
||||||
String fid = RandomUtil.randomString(16);
|
String fid = RandomUtil.randomString(10);
|
||||||
fids.add(fid);
|
fids.add(fid);
|
||||||
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
|
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
|
||||||
knowledgeFragment.setKid(kid);
|
knowledgeFragment.setKid(kid);
|
||||||
@@ -216,7 +216,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
|||||||
knowledgeAttach.setContent(content);
|
knowledgeAttach.setContent(content);
|
||||||
knowledgeAttach.setCreateTime(new Date());
|
knowledgeAttach.setCreateTime(new Date());
|
||||||
attachMapper.insert(knowledgeAttach);
|
attachMapper.insert(knowledgeAttach);
|
||||||
embeddingService.storeEmbeddings(chunkList,kid,docId,fids);
|
vectorStoreService.storeEmbeddings(chunkList,kid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user