mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-03-13 20:53:42 +08:00
Merge remote-tracking branch 'upstream/main' into feat_vectorStore
# Conflicts: # ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package org.ruoyi.domain;
|
||||
|
||||
|
||||
import com.alibaba.excel.annotation.ExcelProperty;
|
||||
import com.baomidou.mybatisplus.annotation.TableId;
|
||||
import com.baomidou.mybatisplus.annotation.TableName;
|
||||
import lombok.Data;
|
||||
@@ -81,6 +82,11 @@ public class ChatModel extends BaseEntity {
|
||||
*/
|
||||
private Integer priority;
|
||||
|
||||
/**
|
||||
* 模型供应商
|
||||
*/
|
||||
private String ProviderName;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
package org.ruoyi.domain.bo;
|
||||
|
||||
import com.alibaba.excel.annotation.ExcelProperty;
|
||||
import io.github.linpeilie.annotations.AutoMapper;
|
||||
import jakarta.validation.constraints.NotBlank;
|
||||
import jakarta.validation.constraints.NotNull;
|
||||
@@ -85,6 +86,10 @@ public class ChatModelBo extends BaseEntity {
|
||||
@NotBlank(message = "密钥不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* 模型供应商
|
||||
*/
|
||||
private String ProviderName;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
|
||||
@@ -95,6 +95,12 @@ public class ChatModelVo implements Serializable {
|
||||
@ExcelProperty(value = "优先级")
|
||||
private Integer priority;
|
||||
|
||||
/**
|
||||
* 模型供应商
|
||||
*/
|
||||
@ExcelProperty(value = "模型供应商")
|
||||
private String ProviderName;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
|
||||
@@ -112,6 +112,10 @@
|
||||
<artifactId>dashscope-sdk-java</artifactId>
|
||||
<version>2.19.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.ruoyi</groupId>
|
||||
<artifactId>ruoyi-chat-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
|
||||
@@ -83,6 +83,11 @@ public class KnowledgeInfo extends BaseEntity {
|
||||
*/
|
||||
private String vectorModelName;
|
||||
|
||||
/**
|
||||
* 向量化模型id
|
||||
*/
|
||||
private Long embeddingModelId;
|
||||
|
||||
/**
|
||||
* 向量化模型名称
|
||||
*/
|
||||
|
||||
@@ -92,7 +92,11 @@ public class KnowledgeInfoBo extends BaseEntity {
|
||||
/**
|
||||
* 向量化模型名称
|
||||
*/
|
||||
@NotBlank(message = "向量模型不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Long embeddingModelId;
|
||||
|
||||
/**
|
||||
* 向量化模型名称
|
||||
*/
|
||||
private String embeddingModelName;
|
||||
|
||||
|
||||
|
||||
@@ -31,7 +31,12 @@ public class QueryVectorBo {
|
||||
private String vectorModelName;
|
||||
|
||||
/**
|
||||
* 向量化模型名称
|
||||
* 向量化模型ID
|
||||
*/
|
||||
private Long embeddingModelId;
|
||||
|
||||
/**
|
||||
* 向量化模型ID
|
||||
*/
|
||||
private String embeddingModelName;
|
||||
|
||||
|
||||
@@ -36,6 +36,11 @@ public class StoreEmbeddingBo {
|
||||
*/
|
||||
private String vectorModelName;
|
||||
|
||||
/**
|
||||
* 向量化模型id
|
||||
*/
|
||||
private Long embeddingModelId;
|
||||
|
||||
/**
|
||||
* 向量化模型名称
|
||||
*/
|
||||
|
||||
@@ -101,6 +101,11 @@ public class KnowledgeInfoVo implements Serializable {
|
||||
*/
|
||||
private String vectorModelName;
|
||||
|
||||
/**
|
||||
* 向量化模型id
|
||||
*/
|
||||
private Long embeddingModelId;
|
||||
|
||||
/**
|
||||
* 向量化模型名称
|
||||
*/
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
package org.ruoyi.embedding;
|
||||
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* BaseEmbedModelService 接口,扩展了 EmbeddingModel 接口
|
||||
* 该接口定义了嵌入模型服务的基本配置和功能方法
|
||||
*/
|
||||
public interface BaseEmbedModelService extends EmbeddingModel {
|
||||
/**
|
||||
* 根据配置信息配置嵌入模型
|
||||
* @param config 包含模型配置信息的 ChatModelVo 对象
|
||||
*/
|
||||
void configure(ChatModelVo config);
|
||||
|
||||
/**
|
||||
* 获取当前嵌入模型支持的所有模态类型
|
||||
* @return 返回支持的模态类型集合
|
||||
*/
|
||||
Set<ModalityType> getSupportedModalities();
|
||||
|
||||
}
|
||||
@@ -0,0 +1,117 @@
|
||||
package org.ruoyi.embedding;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.springframework.beans.factory.NoSuchBeanDefinitionException;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
/**
|
||||
* 嵌入模型工厂服务类
|
||||
* 负责创建和管理各种嵌入模型实例
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Slf4j
|
||||
public class EmbeddingModelFactory {
|
||||
|
||||
private final ApplicationContext applicationContext;
|
||||
|
||||
private final IChatModelService chatModelService;
|
||||
|
||||
// 模型缓存,使用ConcurrentHashMap保证线程安全
|
||||
private final Map<Long, BaseEmbedModelService> modelCache = new ConcurrentHashMap<>();
|
||||
|
||||
/**
|
||||
* 创建嵌入模型实例
|
||||
* 如果模型已存在于缓存中,则直接返回;否则创建新的实例
|
||||
*
|
||||
* @param embeddingModelId 嵌入模型的唯一标识ID
|
||||
* @return BaseEmbedModelService 嵌入模型服务实例
|
||||
*/
|
||||
public BaseEmbedModelService createModel(Long embeddingModelId) {
|
||||
return modelCache.computeIfAbsent(embeddingModelId, id -> {
|
||||
ChatModelVo modelConfig = chatModelService.queryById(id);
|
||||
if (modelConfig == null) {
|
||||
throw new IllegalArgumentException("未找到模型配置,ID=" + id);
|
||||
}
|
||||
return createModelInstance(modelConfig.getProviderName(), modelConfig);
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查模型是否支持多模态
|
||||
*
|
||||
* @param embeddingModelId 嵌入模型的唯一标识ID
|
||||
* @return boolean 如果模型支持多模态则返回true,否则返回false
|
||||
*/
|
||||
public boolean isMultimodalModel(Long embeddingModelId) {
|
||||
return createModel(embeddingModelId) instanceof MultiModalEmbedModelService;
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建多模态嵌入模型实例
|
||||
*
|
||||
* @param tenantId 租户ID
|
||||
* @return MultiModalEmbedModelService 多模态嵌入模型服务实例
|
||||
* @throws IllegalArgumentException 当模型不支持多模态时抛出
|
||||
*/
|
||||
public MultiModalEmbedModelService createMultimodalModel(Long tenantId) {
|
||||
BaseEmbedModelService model = createModel(tenantId);
|
||||
if (model instanceof MultiModalEmbedModelService) {
|
||||
return (MultiModalEmbedModelService) model;
|
||||
}
|
||||
throw new IllegalArgumentException("该模型不支持多模态");
|
||||
}
|
||||
|
||||
/**
|
||||
* 刷新模型缓存
|
||||
* 根据给定的嵌入模型ID从缓存中移除对应的模型
|
||||
*
|
||||
* @param embeddingModelId 嵌入模型的唯一标识ID
|
||||
*/
|
||||
public void refreshModel(Long embeddingModelId) {
|
||||
// 从模型缓存中移除指定ID的模型
|
||||
modelCache.remove(embeddingModelId);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取所有支持模型工厂的列表
|
||||
*
|
||||
* @return List<String> 支持的模型工厂名称列表
|
||||
*/
|
||||
public List<String> getSupportedFactories() {
|
||||
return new ArrayList<>(applicationContext.getBeansOfType(BaseEmbedModelService.class)
|
||||
.keySet());
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建具体的模型实例
|
||||
* 根据提供的工厂名称和配置信息创建并配置模型实例
|
||||
*
|
||||
* @param factory 工厂名称,用于标识模型类型
|
||||
* @param config 模型配置信息
|
||||
* @return BaseEmbedModelService 配置好的模型实例
|
||||
* @throws IllegalArgumentException 当无法获取指定的模型实例时抛出
|
||||
*/
|
||||
private BaseEmbedModelService createModelInstance(String factory, ChatModelVo config) {
|
||||
try {
|
||||
// 从Spring上下文中获取模型实例
|
||||
BaseEmbedModelService model = applicationContext.getBean(factory, BaseEmbedModelService.class);
|
||||
// 配置模型参数
|
||||
model.configure(config);
|
||||
log.info("成功创建嵌入模型: factory={}, modelId={}", config.getProviderName(), config.getId());
|
||||
|
||||
return model;
|
||||
} catch (NoSuchBeanDefinitionException e) {
|
||||
throw new IllegalArgumentException("获取不到嵌入模型: " + factory, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,35 @@
|
||||
package org.ruoyi.embedding;
|
||||
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import org.ruoyi.embedding.model.MultiModalInput;
|
||||
|
||||
|
||||
/**
|
||||
* 多模态嵌入模型服务接口,继承自基础嵌入模型服务
|
||||
* 该接口提供了处理图像、视频以及多模态数据并转换为嵌入向量的功能
|
||||
*/
|
||||
public interface MultiModalEmbedModelService extends BaseEmbedModelService {
|
||||
/**
|
||||
* 将图像数据转换为嵌入向量
|
||||
* @param imageDataUrl 图像的地址,必须是公开可访问的URL
|
||||
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
|
||||
*/
|
||||
Response<Embedding> embedImage(String imageDataUrl);
|
||||
|
||||
/**
|
||||
* 将视频数据转换为嵌入向量
|
||||
* @param videoDataUrl 视频的地址,必须是公开可访问的URL
|
||||
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
|
||||
*/
|
||||
Response<Embedding> embedVideo(String videoDataUrl);
|
||||
|
||||
|
||||
/**
|
||||
* 处理多模态输入并返回嵌入向量的方法
|
||||
*
|
||||
* @param input 包含多种模态信息(如图像、文本等)的输入对象
|
||||
* @return Response<Embedding> 包含嵌入向量的响应对象,Embedding通常表示输入数据的向量表示
|
||||
*/
|
||||
Response<Embedding> embedMultiModal(MultiModalInput input);
|
||||
}
|
||||
@@ -0,0 +1,14 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午3:00
|
||||
* @Description: 阿里百炼基础嵌入模型(兼容openai)
|
||||
*/
|
||||
@Component("alibailian")
|
||||
public class AliBaiLianBaseEmbedProvider extends OpenAiEmbeddingProvider{
|
||||
|
||||
}
|
||||
@@ -0,0 +1,281 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import dev.langchain4j.model.output.TokenUsage;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.*;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.embedding.MultiModalEmbedModelService;
|
||||
import org.ruoyi.embedding.model.AliyunMultiModalEmbedRequest;
|
||||
import org.ruoyi.embedding.model.AliyunMultiModalEmbedResponse;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.ruoyi.embedding.model.MultiModalInput;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
* 阿里云百炼多模态嵌入模型服务实现类
|
||||
* 实现了MultiModalEmbedModelService接口,提供文本、图像和视频的嵌入向量生成服务
|
||||
*/
|
||||
@Component("bailianMultiModel")
|
||||
@Slf4j
|
||||
public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelService {
|
||||
private ChatModelVo chatModelVo;
|
||||
|
||||
private final OkHttpClient okHttpClient;
|
||||
|
||||
/**
|
||||
* 构造函数,初始化HTTP客户端
|
||||
* 设置连接超时、读取超时和写入超时时间
|
||||
*/
|
||||
public AliBaiLianMultiEmbeddingProvider() {
|
||||
this.okHttpClient = new OkHttpClient.Builder()
|
||||
.connectTimeout(30, TimeUnit.SECONDS)
|
||||
.readTimeout(60, TimeUnit.SECONDS)
|
||||
.writeTimeout(30, TimeUnit.SECONDS)
|
||||
.build();
|
||||
}
|
||||
|
||||
/**
|
||||
* 图像嵌入向量生成
|
||||
* @param imageDataUrl 图像数据的URL
|
||||
* @return 包含图像嵌入向量的Response对象
|
||||
*/
|
||||
@Override
|
||||
public Response<Embedding> embedImage(String imageDataUrl) {
|
||||
return embedSingleModality("image", imageDataUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* 视频嵌入向量生成
|
||||
* @param videoDataUrl 视频数据的URL
|
||||
* @return 包含视频嵌入向量的Response对象
|
||||
*/
|
||||
@Override
|
||||
public Response<Embedding> embedVideo(String videoDataUrl) {
|
||||
return embedSingleModality("video", videoDataUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* 多模态嵌入向量生成
|
||||
* 支持同时处理文本、图像和视频等多种模态的数据
|
||||
* @param input 包含多种模态输入的对象
|
||||
* @return 包含多模态嵌入向量的Response对象
|
||||
*/
|
||||
@Override
|
||||
public Response<Embedding> embedMultiModal(MultiModalInput input) {
|
||||
try {
|
||||
// 构建请求内容
|
||||
List<Map<String, Object>> contents = buildContentMap(input);
|
||||
if (contents.isEmpty()) {
|
||||
throw new IllegalArgumentException("至少提供一种模态的内容");
|
||||
}
|
||||
|
||||
// 构建请求
|
||||
AliyunMultiModalEmbedRequest request = buildRequest(contents, chatModelVo);
|
||||
AliyunMultiModalEmbedResponse resp = executeRequest(request, chatModelVo);
|
||||
|
||||
// 转换为 embeddings
|
||||
Response<List<Embedding>> response = toEmbeddings(resp);
|
||||
List<Embedding> embeddings = response.content();
|
||||
|
||||
if (embeddings.isEmpty()) {
|
||||
log.warn("阿里云混合模态嵌入返回为空");
|
||||
return Response.from(Embedding.from(new float[0]), response.tokenUsage());
|
||||
}
|
||||
|
||||
// 多模态通常取第一个向量作为代表,也可以根据业务场景返回多个
|
||||
return Response.from(embeddings.get(0), response.tokenUsage());
|
||||
|
||||
} catch (Exception e) {
|
||||
log.error("阿里云混合模态嵌入失败", e);
|
||||
throw new IllegalArgumentException("阿里云混合模态嵌入失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 配置模型参数
|
||||
* @param config 模型配置信息
|
||||
*/
|
||||
@Override
|
||||
public void configure(ChatModelVo config) {
|
||||
this.chatModelVo = config;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取支持的模态类型
|
||||
* @return 支持的模态类型集合
|
||||
*/
|
||||
@Override
|
||||
public Set<ModalityType> getSupportedModalities() {
|
||||
return Set.of(ModalityType.TEXT, ModalityType.VIDEO, ModalityType.IMAGE);
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量文本嵌入向量生成
|
||||
* @param textSegments 文本段列表
|
||||
* @return 包含所有文本嵌入向量的Response对象
|
||||
*/
|
||||
@Override
|
||||
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
|
||||
if (textSegments.isEmpty()) return Response.from(Collections.emptyList());
|
||||
|
||||
try {
|
||||
List<Map<String, Object>> contents = new ArrayList<>();
|
||||
for (TextSegment segment : textSegments) {
|
||||
contents.add(Map.of("text", segment.text()));
|
||||
}
|
||||
|
||||
AliyunMultiModalEmbedRequest request = buildRequest(contents, chatModelVo);
|
||||
AliyunMultiModalEmbedResponse resp = executeRequest(request, chatModelVo);
|
||||
|
||||
return toEmbeddings(resp);
|
||||
} catch (Exception e) {
|
||||
log.error("阿里云文本嵌入失败", e);
|
||||
throw new IllegalArgumentException("阿里云文本嵌入失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 单模态嵌入(图片/视频/单条文本)复用方法
|
||||
* @param key 模态类型(image/video/text)
|
||||
* @param dataUrl 数据URL
|
||||
* @return 包含嵌入向量的Response对象
|
||||
*/
|
||||
|
||||
public Response<Embedding> embedSingleModality(String key, String dataUrl) {
|
||||
try {
|
||||
AliyunMultiModalEmbedRequest request = buildRequest(List.of(Map.of(key, dataUrl)), chatModelVo);
|
||||
AliyunMultiModalEmbedResponse resp = executeRequest(request, chatModelVo);
|
||||
|
||||
Response<List<Embedding>> response = toEmbeddings(resp);
|
||||
List<Embedding> embeddings = response.content();
|
||||
|
||||
if (embeddings.isEmpty()) {
|
||||
log.warn("阿里云 {} 嵌入返回为空", key);
|
||||
return Response.from(Embedding.from(new float[0]), response.tokenUsage());
|
||||
}
|
||||
|
||||
return Response.from(embeddings.get(0), response.tokenUsage());
|
||||
} catch (Exception e) {
|
||||
log.error("阿里云 {} 嵌入失败", key, e);
|
||||
throw new IllegalArgumentException("阿里云 " + key + " 嵌入失败", e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建请求对象
|
||||
* @param contents 请求内容列表
|
||||
* @param chatModelVo 模型配置信息
|
||||
* @return 构建好的请求对象
|
||||
*/
|
||||
private AliyunMultiModalEmbedRequest buildRequest(List<Map<String, Object>> contents, ChatModelVo chatModelVo) {
|
||||
if (contents.isEmpty()) throw new IllegalArgumentException("请求内容不能为空");
|
||||
return AliyunMultiModalEmbedRequest.create(chatModelVo.getModelName(), contents);
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行 HTTP 请求并解析响应
|
||||
* @param request 请求对象
|
||||
* @param chatModelVo 模型配置信息
|
||||
* @return API响应对象
|
||||
* @throws IOException IO异常
|
||||
*/
|
||||
private AliyunMultiModalEmbedResponse executeRequest(AliyunMultiModalEmbedRequest request, ChatModelVo chatModelVo) throws IOException {
|
||||
String jsonBody = request.toJson();
|
||||
RequestBody body = RequestBody.create(jsonBody, MediaType.get("application/json"));
|
||||
|
||||
Request httpRequest = new Request.Builder()
|
||||
.url(chatModelVo.getApiHost())
|
||||
.addHeader("Authorization", "Bearer " + chatModelVo.getApiKey())
|
||||
.post(body)
|
||||
.build();
|
||||
|
||||
try (okhttp3.Response response = okHttpClient.newCall(httpRequest).execute()) {
|
||||
if (!response.isSuccessful()) {
|
||||
String err = response.body() != null ? response.body().string() : "无错误信息";
|
||||
throw new IllegalArgumentException("API调用失败: " + response.code() + " - " + err, null);
|
||||
}
|
||||
|
||||
ResponseBody responseBody = response.body();
|
||||
if (responseBody == null) throw new IllegalArgumentException("响应体为空", null);
|
||||
|
||||
return parseEmbeddingsFromResponse(responseBody.string());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解析嵌入向量列表
|
||||
* @param responseBody API响应的JSON字符串
|
||||
* @return 嵌入向量响应对象
|
||||
* @throws IOException IO异常
|
||||
*/
|
||||
private AliyunMultiModalEmbedResponse parseEmbeddingsFromResponse(String responseBody) throws IOException {
|
||||
ObjectMapper objectMapper1 = new ObjectMapper();
|
||||
return objectMapper1.readValue(responseBody, AliyunMultiModalEmbedResponse.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建 API 请求内容 Map
|
||||
* @param input 多模态输入对象
|
||||
* @return 包含各种模态内容的Map列表
|
||||
*/
|
||||
private List<Map<String, Object>> buildContentMap(MultiModalInput input) {
|
||||
List<Map<String, Object>> contents = new ArrayList<>();
|
||||
|
||||
if (input.getText() != null && !input.getText().isBlank()) {
|
||||
contents.add(Map.of("text", input.getText()));
|
||||
}
|
||||
if (input.getImageUrl() != null && !input.getImageUrl().isBlank()) {
|
||||
contents.add(Map.of("image", input.getImageUrl()));
|
||||
}
|
||||
if (input.getVideoUrl() != null && !input.getVideoUrl().isBlank()) {
|
||||
contents.add(Map.of("video", input.getVideoUrl()));
|
||||
}
|
||||
if (input.getMultiImageUrls() != null && input.getMultiImageUrls().length > 0) {
|
||||
contents.add(Map.of("multi_images", Arrays.asList(input.getMultiImageUrls())));
|
||||
}
|
||||
|
||||
return contents;
|
||||
}
|
||||
|
||||
/**
|
||||
* 将 API 原始响应解析为 LangChain4j 的 Response<Embedding>
|
||||
* @param resp API原始响应对象
|
||||
* @return 包含嵌入向量和token使用情况的Response对象
|
||||
*/
|
||||
private Response<List<Embedding>> toEmbeddings(AliyunMultiModalEmbedResponse resp) {
|
||||
if (resp == null || resp.output() == null || resp.output().embeddings() == null) {
|
||||
return Response.from(Collections.emptyList());
|
||||
}
|
||||
|
||||
// 转换 double -> float
|
||||
List<Embedding> embeddings = resp.output().embeddings().stream()
|
||||
.map(item -> {
|
||||
float[] vector = new float[item.embedding().size()];
|
||||
for (int i = 0; i < item.embedding().size(); i++) {
|
||||
vector[i] = item.embedding().get(i).floatValue();
|
||||
}
|
||||
return Embedding.from(vector);
|
||||
})
|
||||
.toList();
|
||||
|
||||
// 构建 TokenUsage
|
||||
TokenUsage tokenUsage = null;
|
||||
if (resp.usage() != null) {
|
||||
tokenUsage = new TokenUsage(
|
||||
resp.usage().input_tokens(),
|
||||
resp.usage().image_tokens(),
|
||||
resp.usage().input_tokens() +resp.usage().image_tokens()
|
||||
);
|
||||
}
|
||||
|
||||
return Response.from(embeddings, tokenUsage);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.embedding.BaseEmbedModelService;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午3:00
|
||||
* @Description: Ollama嵌入模型
|
||||
*/
|
||||
@Component("ollama")
|
||||
public class OllamaEmbeddingProvider implements BaseEmbedModelService {
|
||||
private ChatModelVo chatModelVo;
|
||||
|
||||
@Override
|
||||
public void configure(ChatModelVo config) {
|
||||
this.chatModelVo = config;
|
||||
}
|
||||
@Override
|
||||
public Set<ModalityType> getSupportedModalities() {
|
||||
return Set.of(ModalityType.TEXT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
|
||||
return OllamaEmbeddingModel.builder()
|
||||
.baseUrl(chatModelVo.getApiHost())
|
||||
.modelName(chatModelVo.getModelName())
|
||||
.build()
|
||||
.embedAll(textSegments);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.embedding.BaseEmbedModelService;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午3:59
|
||||
* @Description: OpenAi嵌入模型
|
||||
*/
|
||||
@Component("openai")
|
||||
public class OpenAiEmbeddingProvider implements BaseEmbedModelService {
|
||||
protected ChatModelVo chatModelVo;
|
||||
|
||||
@Override
|
||||
public void configure(ChatModelVo config) {
|
||||
this.chatModelVo = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<ModalityType> getSupportedModalities() {
|
||||
return Set.of(ModalityType.TEXT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
|
||||
return OpenAiEmbeddingModel.builder()
|
||||
.baseUrl(chatModelVo.getApiHost())
|
||||
.apiKey(chatModelVo.getApiKey())
|
||||
.modelName(chatModelVo.getModelName())
|
||||
.build()
|
||||
.embedAll(textSegments);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
|
||||
import org.ruoyi.embedding.BaseEmbedModelService;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午3:59
|
||||
* @Description: 硅基流动(兼容 OpenAi)
|
||||
*/
|
||||
@Component("siliconflow")
|
||||
public class SiliconFlowEmbeddingProvider extends OpenAiEmbeddingProvider {
|
||||
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
import dev.langchain4j.community.model.zhipu.ZhipuAiEmbeddingModel;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.embedding.BaseEmbedModelService;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午4:02
|
||||
* @Description: 智谱AI
|
||||
*/
|
||||
@Component("zhipu")
|
||||
public class ZhiPuAiEmbeddingProvider implements BaseEmbedModelService {
|
||||
private ChatModelVo chatModelVo;
|
||||
|
||||
@Override
|
||||
public void configure(ChatModelVo config) {
|
||||
this.chatModelVo = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<ModalityType> getSupportedModalities() {
|
||||
return Set.of();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
|
||||
return ZhipuAiEmbeddingModel.builder()
|
||||
.baseUrl(chatModelVo.getApiHost())
|
||||
.apiKey(chatModelVo.getApiKey())
|
||||
.model(chatModelVo.getModelName())
|
||||
.build()
|
||||
.embedAll(textSegments);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package org.ruoyi.embedding.model;
|
||||
|
||||
import org.ruoyi.common.json.utils.JsonUtils;
|
||||
import lombok.Data;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-10-1-上午10:00
|
||||
* @Description: 阿里云多模态嵌入请求
|
||||
*/
|
||||
@Data
|
||||
public class AliyunMultiModalEmbedRequest {
|
||||
private String model;
|
||||
private Input input;
|
||||
|
||||
/**
|
||||
* 表示输入数据的记录类(Record)
|
||||
* 该类用于封装一个包含多个映射关系列表的输入数据结构
|
||||
*
|
||||
* @param contents 包含多个Map的列表,每个Map中存储String类型的键和Object类型的值
|
||||
*/
|
||||
public record Input(List<Map<String, Object>> contents) { }
|
||||
|
||||
/**
|
||||
* 创建请求对象
|
||||
*/
|
||||
public static AliyunMultiModalEmbedRequest create(String modelName, List<Map<String, Object>> contents) {
|
||||
AliyunMultiModalEmbedRequest request = new AliyunMultiModalEmbedRequest();
|
||||
request.setModel(modelName);
|
||||
Input input = new Input(contents);
|
||||
request.setInput(input);
|
||||
return request;
|
||||
}
|
||||
|
||||
/**
|
||||
* 转换为JSON字符串
|
||||
*/
|
||||
public String toJson() {
|
||||
return JsonUtils.toJsonString(this);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package org.ruoyi.embedding.model;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 阿里云多模态嵌入 API 响应数据模型
|
||||
*/
|
||||
public record AliyunMultiModalEmbedResponse(
|
||||
Output output, // 输出结果对象
|
||||
String request_id, // 请求唯一标识
|
||||
String code, // 错误码
|
||||
String message, // 错误消息
|
||||
Usage usage // 用量信息
|
||||
) {
|
||||
|
||||
/**
|
||||
* 输出对象,包含嵌入向量结果
|
||||
*/
|
||||
public record Output(
|
||||
List<EmbeddingItem> embeddings // 嵌入向量列表
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* 单个嵌入向量条目
|
||||
*/
|
||||
public record EmbeddingItem(
|
||||
int index, // 输入内容的索引
|
||||
List<Double> embedding, // 生成的 1024 维向量
|
||||
String type // 输入的类型 (text/image/video/multi_images)
|
||||
) {
|
||||
}
|
||||
|
||||
/**
|
||||
* 用量统计信息
|
||||
*/
|
||||
public record Usage(
|
||||
int input_tokens, // 本次请求输入的 Token 数量
|
||||
int image_tokens, // 本次请求输入的图像 Token 数量
|
||||
int image_count, // 本次请求输入的图像数量
|
||||
int duration // 本次请求输入的视频时长(秒)
|
||||
) {
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,8 @@
|
||||
package org.ruoyi.embedding.model;
|
||||
|
||||
/**
|
||||
* 模态类型
|
||||
*/
|
||||
public enum ModalityType {
|
||||
TEXT, IMAGE, AUDIO, VIDEO, MULTI
|
||||
}
|
||||
@@ -0,0 +1,71 @@
|
||||
package org.ruoyi.embedding.model;
|
||||
|
||||
import cn.hutool.core.util.ArrayUtil;
|
||||
import cn.hutool.core.util.StrUtil;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午2:13
|
||||
* @Description: 多模态输入
|
||||
*/
|
||||
@Data
|
||||
@Builder
|
||||
public class MultiModalInput {
|
||||
private String text;
|
||||
private byte[] imageData;
|
||||
private byte[] videoData;
|
||||
private String imageMimeType;
|
||||
private String videoMimeType;
|
||||
private String[] multiImageUrls;
|
||||
private String imageUrl;
|
||||
private String videoUrl;
|
||||
|
||||
/**
|
||||
* 检查是否有文本内容
|
||||
*/
|
||||
public boolean hasText() {
|
||||
return StrUtil.isNotBlank(text);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否有图片内容
|
||||
*/
|
||||
public boolean hasImage() {
|
||||
return ArrayUtil.isNotEmpty(imageData) || StrUtil.isNotBlank(imageUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否有视频内容
|
||||
*/
|
||||
public boolean hasVideo() {
|
||||
return ArrayUtil.isNotEmpty(videoData) || StrUtil.isNotBlank(videoUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否有多图片
|
||||
*/
|
||||
public boolean hasMultiImages() {
|
||||
return ArrayUtil.isNotEmpty(multiImageUrls);
|
||||
}
|
||||
|
||||
/**
|
||||
* 检查是否有任何内容
|
||||
*/
|
||||
public boolean hasAnyContent() {
|
||||
return hasText() || hasImage() || hasVideo() || hasMultiImages();
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取内容的数量
|
||||
*/
|
||||
public int getContentCount() {
|
||||
int count = 0;
|
||||
if (hasText()) count++;
|
||||
if (hasImage()) count++;
|
||||
if (hasVideo()) count++;
|
||||
if (hasMultiImages()) count++;
|
||||
return count;
|
||||
}
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package org.ruoyi.service.impl;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.service.ConfigService;
|
||||
import org.ruoyi.domain.bo.QueryVectorBo;
|
||||
import org.ruoyi.domain.bo.StoreEmbeddingBo;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
@@ -9,15 +10,14 @@ import org.ruoyi.service.strategy.VectorStoreStrategy;
|
||||
import org.ruoyi.service.strategy.VectorStoreStrategyFactory;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 向量库服务实现
|
||||
* 向量库管理
|
||||
*
|
||||
* @author ageer
|
||||
*/
|
||||
@Primary
|
||||
@Service
|
||||
@Slf4j
|
||||
@RequiredArgsConstructor
|
||||
@@ -42,7 +42,7 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
|
||||
@Override
|
||||
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
||||
log.info("存储向量数据: kid={}, docId={}, 数据条数={}",
|
||||
log.info("存储向量数据: kid={}, docId={}, 数据条数={}",
|
||||
storeEmbeddingBo.getKid(), storeEmbeddingBo.getDocId(), storeEmbeddingBo.getChunkList().size());
|
||||
VectorStoreStrategy strategy = getCurrentStrategy();
|
||||
strategy.storeEmbeddings(storeEmbeddingBo);
|
||||
@@ -50,7 +50,7 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
|
||||
@Override
|
||||
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
|
||||
log.info("查询向量数据: kid={}, query={}, maxResults={}",
|
||||
log.info("查询向量数据: kid={}, query={}, maxResults={}",
|
||||
queryVectorBo.getKid(), queryVectorBo.getQuery(), queryVectorBo.getMaxResults());
|
||||
VectorStoreStrategy strategy = getCurrentStrategy();
|
||||
return strategy.getQueryVector(queryVectorBo);
|
||||
|
||||
Reference in New Issue
Block a user