mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-04 07:26:10 +00:00
feat: Weaviate改为langchain4j方式调用
This commit is contained in:
@@ -2,9 +2,13 @@ package org.ruoyi.service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author ageer
|
||||
* 向量库管理
|
||||
*/
|
||||
public interface VectorStoreService {
|
||||
|
||||
void storeEmbeddings(List<String> chunkList, String kid);
|
||||
void storeEmbeddings(List<String> chunkList, String kid,String docId,List<String> fids);
|
||||
|
||||
void removeByDocId(String kid,String docId);
|
||||
|
||||
|
||||
@@ -1,13 +0,0 @@
|
||||
package org.ruoyi.service;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 文本向量化
|
||||
*/
|
||||
public interface VectorizationService {
|
||||
|
||||
List<List<Double>> batchVectorization(List<String> chunkList, String kid);
|
||||
|
||||
List<Double> singleVectorization(String chunk, String kid);
|
||||
}
|
||||
@@ -1,76 +1,64 @@
|
||||
package org.ruoyi.service.impl;
|
||||
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
||||
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||
import dev.langchain4j.store.embedding.filter.Filter;
|
||||
import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
|
||||
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.service.ConfigService;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.testcontainers.weaviate.WeaviateContainer;
|
||||
|
||||
import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @author ageer
|
||||
* Weaviate 向量库管理
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class WeaviateVectorStoreImpl implements VectorStoreService {
|
||||
|
||||
private volatile String protocol;
|
||||
private volatile String host;
|
||||
private volatile String className;
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private IKnowledgeInfoService knowledgeInfoService;
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private ConfigService configService;
|
||||
|
||||
private EmbeddingStore<TextSegment> embeddingStore;
|
||||
|
||||
@PostConstruct
|
||||
public void loadConfig() {
|
||||
this.protocol = configService.getConfigValue("weaviate", "protocol");
|
||||
this.host = configService.getConfigValue("weaviate", "host");
|
||||
this.className = configService.getConfigValue("weaviate", "classname");
|
||||
}
|
||||
private EmbeddingStore<TextSegment> embeddingStore;
|
||||
|
||||
private final ConfigService configService;
|
||||
|
||||
@Override
|
||||
public List<String> getQueryVector(String query, String kid) {
|
||||
EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
|
||||
.apiKey(System.getenv("OPENAI_API_KEY"))
|
||||
.baseUrl(System.getenv("OPENAI_BASE_URL"))
|
||||
.modelName("text-embedding-3-small")
|
||||
.apiKey("sk-xxx")
|
||||
.baseUrl("https://api.pandarobot.chat/v1/")
|
||||
.modelName(TEXT_EMBEDDING_3_SMALL)
|
||||
.build();
|
||||
|
||||
Filter simpleFilter = new IsEqualTo("kid", kid);
|
||||
// Filter simpleFilter = new IsEqualTo("kid", kid);
|
||||
|
||||
Embedding queryEmbedding = embeddingModel.embed("What is your favourite sport?").content();
|
||||
// createSchema(kid);
|
||||
|
||||
Embedding queryEmbedding = embeddingModel.embed("聊天补全模型").content();
|
||||
EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
|
||||
.queryEmbedding(queryEmbedding)
|
||||
.maxResults(3)
|
||||
.maxResults(2)
|
||||
// 添加过滤条件
|
||||
.filter(simpleFilter)
|
||||
// .filter(simpleFilter)
|
||||
.build();
|
||||
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches();
|
||||
|
||||
|
||||
|
||||
List<String> results = new ArrayList<>();
|
||||
|
||||
matches.forEach(embeddingMatch -> {
|
||||
@@ -82,10 +70,11 @@ public class WeaviateVectorStoreImpl implements VectorStoreService {
|
||||
|
||||
@Override
|
||||
public void createSchema(String kid) {
|
||||
WeaviateContainer weaviate = new WeaviateContainer(protocol);
|
||||
weaviate.start();
|
||||
String protocol = configService.getConfigValue("weaviate", "protocol");
|
||||
String host = configService.getConfigValue("weaviate", "host");
|
||||
String className = configService.getConfigValue("weaviate", "classname");
|
||||
this.embeddingStore = WeaviateEmbeddingStore.builder()
|
||||
.scheme("http")
|
||||
.scheme(protocol)
|
||||
.host(host)
|
||||
.objectClass(className+kid)
|
||||
.scheme(protocol)
|
||||
@@ -95,25 +84,23 @@ public class WeaviateVectorStoreImpl implements VectorStoreService {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void storeEmbeddings(List<String> chunkList,String kid) {
|
||||
public void storeEmbeddings(List<String> chunkList,String kid,String docId,List<String> fids) {
|
||||
EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
|
||||
.apiKey(System.getenv("OPENAI_API_KEY"))
|
||||
.baseUrl(System.getenv("OPENAI_BASE_URL"))
|
||||
.modelName("text-embedding-3-small")
|
||||
.apiKey("sk-xxxx")
|
||||
.baseUrl("https://api.pandarobot.chat/v1/")
|
||||
.modelName(TEXT_EMBEDDING_3_SMALL)
|
||||
.build();
|
||||
// 生成文档id
|
||||
String docId = RandomUtil.randomString(10);
|
||||
|
||||
chunkList.forEach(chunk -> {
|
||||
// 生成知识块id
|
||||
String fid = RandomUtil.randomString(10);
|
||||
Map<String, Object> dataSchema = new HashMap<>();
|
||||
dataSchema.put("kid", kid);
|
||||
dataSchema.put("docId", docId);
|
||||
dataSchema.put("fid", fid);
|
||||
dataSchema.put("fid", fids.get(0));
|
||||
Response<Embedding> response = embeddingModel.embed(chunk);
|
||||
Embedding embedding = response.content();
|
||||
TextSegment segment = TextSegment.from(chunk);
|
||||
segment.metadata().putAll(dataSchema);
|
||||
Embedding content = embeddingModel.embed(segment).content();
|
||||
embeddingStore.add(content);
|
||||
embeddingStore.add(embedding,segment);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -1,49 +0,0 @@
|
||||
package org.ruoyi.chat.factory;
|
||||
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
|
||||
import org.ruoyi.chat.service.knowledge.BgeLargeVectorizationImpl;
|
||||
import org.ruoyi.chat.service.knowledge.OpenAiVectorizationImpl;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.ruoyi.service.VectorizationService;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* 文本向量化
|
||||
* @author huangkh
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
public class VectorizationFactory {
|
||||
|
||||
private final OpenAiVectorizationImpl openAiVectorization;
|
||||
|
||||
private final BgeLargeVectorizationImpl bgeLargeVectorization;
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private IKnowledgeInfoService knowledgeInfoService;
|
||||
|
||||
public VectorizationFactory(OpenAiVectorizationImpl openAiVectorization, BgeLargeVectorizationImpl bgeLargeVectorization) {
|
||||
this.openAiVectorization = openAiVectorization;
|
||||
this.bgeLargeVectorization = bgeLargeVectorization;
|
||||
}
|
||||
|
||||
public VectorizationService getEmbedding(String kid){
|
||||
String vectorModel = "text-embedding-3-small";
|
||||
if (StrUtil.isNotEmpty(kid)) {
|
||||
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
|
||||
if (knowledgeInfoVo != null && StrUtil.isNotEmpty(knowledgeInfoVo.getVectorModel())) {
|
||||
vectorModel = knowledgeInfoVo.getVectorModel();
|
||||
}
|
||||
}
|
||||
return switch (vectorModel) {
|
||||
case "quentinz/bge-large-zh-v1.5" -> bgeLargeVectorization;
|
||||
default -> openAiVectorization;
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -56,8 +56,6 @@ public class SseServiceImpl implements ISseService {
|
||||
|
||||
private final VectorStoreService vectorStoreService;
|
||||
|
||||
private final VectorStoreService vectorStore;
|
||||
|
||||
private final IChatCostService chatCostService;
|
||||
|
||||
private final IChatModelService chatModelService;
|
||||
|
||||
@@ -1,66 +0,0 @@
|
||||
package org.ruoyi.chat.service.knowledge;
|
||||
|
||||
import io.github.ollama4j.OllamaAPI;
|
||||
import io.github.ollama4j.models.embeddings.OllamaEmbeddingsRequestModel;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.exception.ServiceException;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.ruoyi.service.VectorizationService;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* @author ageer
|
||||
*/
|
||||
@Component
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class BgeLargeVectorizationImpl implements VectorizationService {
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private IKnowledgeInfoService knowledgeInfoService;
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private final IChatModelService chatModelService;
|
||||
|
||||
@Override
|
||||
public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
|
||||
|
||||
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
|
||||
|
||||
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
|
||||
|
||||
OllamaAPI api = new OllamaAPI(chatModelVo.getApiHost());
|
||||
|
||||
List<Double> doubleVector;
|
||||
List<List<Double>> vectorList = new ArrayList<>();
|
||||
try {
|
||||
for (String chunk : chunkList) {
|
||||
doubleVector = api.generateEmbeddings(new OllamaEmbeddingsRequestModel(knowledgeInfoVo.getVectorModel(), chunk));
|
||||
vectorList.add(doubleVector);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ServiceException("文本向量化异常:"+e.getMessage());
|
||||
}
|
||||
return vectorList;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Double> singleVectorization(String chunk, String kid) {
|
||||
List<String> chunkList = new ArrayList<>();
|
||||
chunkList.add(chunk);
|
||||
List<List<Double>> vectorList = batchVectorization(chunkList, kid);
|
||||
return vectorList.get(0);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -25,6 +25,8 @@ import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
||||
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
@@ -42,6 +44,7 @@ import java.util.*;
|
||||
@Service
|
||||
public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class);
|
||||
private final KnowledgeInfoMapper baseMapper;
|
||||
|
||||
private final VectorStoreService vectorStoreService;
|
||||
@@ -211,12 +214,12 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
fragmentMapper.insertBatch(knowledgeFragmentList);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
log.error("保存知识库信息失败!{}", e.getMessage());
|
||||
}
|
||||
knowledgeAttach.setContent(content);
|
||||
knowledgeAttach.setCreateTime(new Date());
|
||||
attachMapper.insert(knowledgeAttach);
|
||||
vectorStoreService.storeEmbeddings(chunkList,kid);
|
||||
vectorStoreService.storeEmbeddings(chunkList,kid,docId,fids);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,107 +0,0 @@
|
||||
package org.ruoyi.chat.service.knowledge;
|
||||
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chat.config.ChatConfig;
|
||||
import org.ruoyi.common.chat.entity.embeddings.Embedding;
|
||||
import org.ruoyi.common.chat.entity.embeddings.EmbeddingResponse;
|
||||
import org.ruoyi.common.chat.openai.OpenAiStreamClient;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.ruoyi.service.VectorizationService;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
public class OpenAiVectorizationImpl implements VectorizationService {
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private IKnowledgeInfoService knowledgeInfoService;
|
||||
|
||||
@Lazy
|
||||
@Resource
|
||||
private IChatModelService chatModelService;
|
||||
|
||||
@Getter
|
||||
private OpenAiStreamClient openAiStreamClient;
|
||||
|
||||
private final ChatConfig chatConfig;
|
||||
|
||||
@Override
|
||||
public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
|
||||
List<List<Double>> vectorList;
|
||||
// 获取知识库信息
|
||||
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
|
||||
if(knowledgeInfoVo == null){
|
||||
log.warn("知识库不存在:请查检ID {}",kid);
|
||||
vectorList=new ArrayList<>();
|
||||
vectorList.add(new ArrayList<>());
|
||||
return vectorList;
|
||||
}
|
||||
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
|
||||
String apiHost= chatModelVo.getApiHost();
|
||||
String apiKey= chatModelVo.getApiKey();
|
||||
openAiStreamClient = ChatConfig.createOpenAiStreamClient(apiHost,apiKey);
|
||||
Embedding embedding = buildEmbedding(chunkList, knowledgeInfoVo);
|
||||
EmbeddingResponse embeddings = openAiStreamClient.embeddings(embedding);
|
||||
// 处理 OpenAI 返回的嵌入数据
|
||||
vectorList = processOpenAiEmbeddings(embeddings);
|
||||
return vectorList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 Embedding 对象
|
||||
*/
|
||||
private Embedding buildEmbedding(List<String> chunkList, KnowledgeInfoVo knowledgeInfoVo) {
|
||||
return Embedding.builder()
|
||||
.input(chunkList)
|
||||
.model(knowledgeInfoVo.getVectorModel())
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理 OpenAI 返回的嵌入数据
|
||||
*/
|
||||
private List<List<Double>> processOpenAiEmbeddings(EmbeddingResponse embeddings) {
|
||||
List<List<Double>> vectorList = new ArrayList<>();
|
||||
|
||||
embeddings.getData().forEach(data -> {
|
||||
List<BigDecimal> vector = data.getEmbedding();
|
||||
List<Double> doubleVector = convertToDoubleList(vector);
|
||||
vectorList.add(doubleVector);
|
||||
});
|
||||
|
||||
return vectorList;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 BigDecimal 转换为 Double 列表
|
||||
*/
|
||||
private List<Double> convertToDoubleList(List<BigDecimal> vector) {
|
||||
return vector.stream()
|
||||
.map(BigDecimal::doubleValue)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public List<Double> singleVectorization(String chunk, String kid) {
|
||||
List<String> chunkList = new ArrayList<>();
|
||||
chunkList.add(chunk);
|
||||
List<List<Double>> vectorList = batchVectorization(chunkList, kid);
|
||||
return vectorList.get(0);
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package org.ruoyi.chat.service.knowledge;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chat.factory.VectorizationFactory;
|
||||
import org.ruoyi.service.VectorizationService;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@Slf4j
|
||||
@Primary
|
||||
@AllArgsConstructor
|
||||
public class VectorizationWrapper implements VectorizationService {
|
||||
|
||||
private final VectorizationFactory vectorizationFactory;
|
||||
@Override
|
||||
public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
|
||||
VectorizationService embedding = vectorizationFactory.getEmbedding(kid);
|
||||
return embedding.batchVectorization(chunkList, kid);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Double> singleVectorization(String chunk, String kid) {
|
||||
VectorizationService embedding = vectorizationFactory.getEmbedding(kid);
|
||||
return embedding.singleVectorization(chunk, kid);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user