v3.0.0 init

This commit is contained in:
ageerle
2026-02-06 03:00:23 +08:00
parent eb2e8f3ff8
commit 7b8cfe02a1
1524 changed files with 53132 additions and 58866 deletions

View File

@@ -1,123 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.ruoyi</groupId>
<artifactId>ruoyi-modules-api</artifactId>
<version>${revision}</version>
<relativePath>../pom.xml</relativePath>
</parent>
<artifactId>ruoyi-knowledge-api</artifactId>
<properties>
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<langchain4j.version>1.0.0-beta4</langchain4j.version>
</properties>
<dependencyManagement>
<dependencies>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-bom</artifactId>
<version>${langchain4j.version}</version>
<type>pom</type>
<scope>import</scope>
</dependency>
</dependencies>
</dependencyManagement>
<dependencies>
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>org.ruoyi</groupId>
<artifactId>ruoyi-system-api</artifactId>
</dependency>
<!-- pdf解析器 -->
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.27</version>
</dependency>
<!-- ollama java sdk -->
<dependency>
<groupId>io.github.ollama4j</groupId>
<artifactId>ollama4j</artifactId>
<version>1.0.79</version>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-weaviate</artifactId>
</dependency>
<dependency>
<groupId>org.testcontainers</groupId>
<artifactId>weaviate</artifactId>
<version>1.19.6</version>
</dependency>
<dependency>
<groupId>io.milvus</groupId>
<artifactId>milvus-sdk-java</artifactId>
<version>2.6.4</version>
</dependency>
<!-- LangChain4j Milvus Embedding Store -->
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-milvus</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-open-ai</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-ollama</artifactId>
</dependency>
<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-apache-tika</artifactId>
<exclusions>
<exclusion>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</exclusion>
</exclusions>
</dependency>
<!-- 显式指定兼容的 commons-compress 版本 -->
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-compress</artifactId>
</dependency>
<dependency>
<groupId>org.ruoyi</groupId>
<artifactId>ruoyi-chat-api</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -1,39 +0,0 @@
package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class CodeFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader bufferedReader = new BufferedReader(reader)) {
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -1,16 +0,0 @@
package org.ruoyi.chain.loader;
import java.io.InputStream;
import java.util.List;
public class CsvFileLoader implements ResourceLoader {
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content, String kid) {
return null;
}
}

View File

@@ -1,43 +0,0 @@
package org.ruoyi.chain.loader;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.split.TextSplitter;
import org.ruoyi.common.core.exception.UtilException;
import org.springframework.stereotype.Component;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class ExcelFileLoader implements ResourceLoader {
private static final int DEFAULT_BUFFER_SIZE = 8192;
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
// 使用带缓冲的输入流包装(保持原流不自动关闭)
try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
Document document = apacheTikaDocumentParser.parse(bufferedStream);
return document.text();
} catch (IOException e) {
String errorMsg = "Excel文件流读取失败";
throw new UtilException(errorMsg, e);
} catch (RuntimeException e) {
String errorMsg = "Excel内容解析异常";
throw new UtilException(errorMsg, e);
}
}
@Override
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -1,16 +0,0 @@
package org.ruoyi.chain.loader;
import java.io.InputStream;
import java.util.List;
public class FolderLoader implements ResourceLoader {
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content, String kid) {
return null;
}
}

View File

@@ -1,16 +0,0 @@
package org.ruoyi.chain.loader;
import java.io.InputStream;
import java.util.List;
public class GithubLoader implements ResourceLoader {
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content, String kid) {
return null;
}
}

View File

@@ -1,16 +0,0 @@
package org.ruoyi.chain.loader;
import java.io.InputStream;
import java.util.List;
public class JsonFileLoader implements ResourceLoader {
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content, String kid) {
return null;
}
}

View File

@@ -1,39 +0,0 @@
package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class MarkDownFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader bufferedReader = new BufferedReader(reader)) {
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -1,35 +0,0 @@
package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.ruoyi.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
public class PdfFileLoader implements ResourceLoader {
private final TextSplitter characterTextSplitter;
@Override
public String getContent(InputStream inputStream) {
PDDocument document = null;
try {
document = PDDocument.load(inputStream);
PDFTextStripper textStripper = new PDFTextStripper();
String content = textStripper.getText(document);
return content;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public List<String> getChunkList(String content, String kid) {
return characterTextSplitter.split(content, kid);
}
}

View File

@@ -1,442 +0,0 @@
package org.ruoyi.chain.loader;
import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentParser;
import dev.langchain4j.data.document.loader.FileSystemDocumentLoader;
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.data.message.ImageContent;
import dev.langchain4j.data.message.TextContent;
import dev.langchain4j.data.message.UserMessage;
import dev.langchain4j.model.chat.response.ChatResponse;
import dev.langchain4j.model.openai.OpenAiChatModel;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;
import org.ruoyi.chain.split.TextSplitter;
import org.ruoyi.common.core.utils.file.FileUtils;
import org.ruoyi.common.oss.core.OssClient;
import org.ruoyi.common.oss.entity.UploadResult;
import org.ruoyi.common.oss.factory.OssFactory;
import org.ruoyi.config.properties.PdfProperties;
import org.ruoyi.system.domain.SysOss;
import org.ruoyi.system.mapper.SysOssMapper;
import org.springframework.stereotype.Component;
import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Pdf mineru文件加载器
*
* @author zpx
*/
@Slf4j
@Component
@AllArgsConstructor
public class PdfMinerUFileLoader implements ResourceLoader {
// 预编译正则表达式
private static final Pattern MD_IMAGE_PATTERN = Pattern.compile("!\\[(.*?)]\\((.*?)(\\s*=\\d+)?\\)");
private final TextSplitter characterTextSplitter;
private final PdfProperties properties;
private final SysOssMapper sysOssMapper;
// OCR图片识别线程池
private final ThreadPoolExecutor ocrExecutor = new ThreadPoolExecutor(
// 核心线程数
5,
// 最大线程数
10,
// 空闲线程存活时间
60L, TimeUnit.SECONDS,
// 任务队列容量
new LinkedBlockingQueue<>(100),
// 拒绝策略
new ThreadPoolExecutor.CallerRunsPolicy()
);
/**
* 创建临时PDF文件
*
* @param is 输入流
* @return
* @throws IOException
*/
private static File createTempFile(InputStream is) throws IOException {
File tempFile = File.createTempFile("upload_", ".pdf");
Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
return tempFile;
}
/**
* 构建跨平台文件输出路径
*
* @return
* @throws IOException
*/
private static Path buildOutputPath() throws IOException {
Path basePath = isWindows() ?
// Windows C盘用户路径下 minerUOutPut避免其他盘符权限问题
Paths.get(System.getProperty("user.home")).resolve("minerUOutPut") :
Paths.get("/var/minerUOutPut");
if (!Files.exists(basePath)) {
Files.createDirectories(basePath);
}
return basePath;
}
/**
* 判断当前操作系统是否为Windows
*
* @return
*/
private static boolean isWindows() {
return System.getProperty("os.name").toLowerCase().contains("win");
}
/**
* 执行命令
*
* @param condaEnv conda环境路径
* @param inputFile 输入文件
* @param outputPath 输出路径
* @return
* @throws IOException
*/
private static Process buildProcess(String condaEnv, File inputFile, Path outputPath) throws IOException {
ProcessBuilder pb = new ProcessBuilder();
String[] command;
if (isWindows()) {
command = new String[]{
"cmd", "/c",
"call", "conda", "activate",
condaEnv.replace("\"", ""),
"&&", "magic-pdf",
"-p", inputFile.getAbsolutePath(),
"-o", outputPath.toString()
};
} else {
command = new String[]{
"bash", "-c",
String.format("source '%s/bin/activate' && magic-pdf -p '%s' -o '%s'",
condaEnv,
inputFile.getAbsolutePath(),
outputPath.toString())
};
}
return pb.command(command)
.redirectErrorStream(true)
.start();
}
/**
* 实时日志输出
*
* @param process 进程
*/
private static void logProcessOutput(Process process) {
Executors.newSingleThreadExecutor().submit(() -> {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
log.info("[PROCESS LOG] " + line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
}
/**
* 验证转换结果
*
* @param inputFile 输入文件
* @param outputPath 输出路径
* @param exitCode 退出码
* @return
*/
private static String verifyResult(File inputFile, Path outputPath, int exitCode) {
String baseName = FilenameUtils.removeExtension(inputFile.getName());
Path expectedMd = outputPath
.resolve(baseName)
.resolve("auto")
.resolve(baseName + ".md");
if (exitCode == 0 && Files.exists(expectedMd)) {
log.info("转换成功:{}", expectedMd.toString());
return expectedMd.toString();
}
return String.format("转换失败(退出码%d| 预期文件:%s", exitCode, expectedMd);
}
/**
* 多模态OCR识别图片内容
*
* @param imageUrl 图片URL
* @return
*/
private static String imageUrlOCR(String imageUrl) {
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey("demo")
.modelName("gpt-4o-mini")
.baseUrl("http://langchain4j.dev/demo/openai/v1")
.build();
UserMessage userMessage = UserMessage.from(
TextContent.from(
"请按以下逻辑处理图片:\n" +
"1. 文字检测:识别图中所有可见文字(包括水印/标签若无文字则跳至步骤3\n" +
"2. 文字处理:\n" +
" a. 对识别到的文字进行❗核心信息提炼\n" +
" b. ❗禁止直接输出原文内容\n" +
" c. 描述文字位置(如'顶部居中')、字体特征(颜色/大小)\n" +
"3. 视觉描述:\n" +
" a. 若无文字则用❗50字内简洁描述主体对象、场景、色彩搭配与画面氛围\n" +
" b. 若有文字则补充说明文字与画面的关系\n" +
"4. 输出规则:\n" +
" - 最终输出为纯文本,格式:'[文字总结] 视觉描述 关键词xx,xx'\n" +
" - 关键词从内容中提取3个最具代表性的名词\n" +
" - 无文字时格式:'[空] 简洁描述 关键词xx,xx'"
),
ImageContent.from(imageUrl)
);
ChatResponse chat = model.chat(userMessage);
AiMessage answer = chat.aiMessage();
return answer.text();
}
/**
* 清理输出目录
*
* @param outputPath 输出目录
*/
private static void cleanOutputDirectory(Path outputPath) {
if (Files.exists(outputPath)) {
try {
Files.walk(outputPath)
// 按逆序删除(子目录先删)
.sorted((p1, p2) -> -p1.compareTo(p2))
.forEach(path -> {
try {
Files.delete(path);
} catch (IOException e) {
log.warn("清理输出目录失败: {}", path, e);
}
});
} catch (IOException e) {
log.error("遍历输出目录失败", e);
}
}
}
@Override
public String getContent(InputStream inputStream) {
String content = "";
File tempPdf = null;
Path outputPath = null;
try {
// 创建临时文件
tempPdf = createTempFile(inputStream);
//构建输出路径
outputPath = buildOutputPath();
// 执行转换命令
Process process = buildProcess(properties.getTransition().getCondaEnvPath(), tempPdf, outputPath);
//打印执行日志
logProcessOutput(process);
int exitCode = process.waitFor();
//验证转换结果
String verifyResult = verifyResult(tempPdf, outputPath, exitCode);
// 获取生成的.md文件路径
Path mdFilePath = Paths.get(verifyResult);
if (Files.exists(mdFilePath)) {
log.info("找到Markdown文件: " + mdFilePath);
DocumentParser documentParser = new ApacheTikaDocumentParser();
Document document = FileSystemDocumentLoader.loadDocument(mdFilePath.toString(), documentParser);
if (null != document) {
content = document.text();
// 判断是否md文档
String fileType = FilenameUtils.getExtension(mdFilePath.getFileName().toString());
//判断是否需要进行图片OCR识别
if ("md".contains(fileType) && properties.getTransition().isEnableOcr()) {
// 如果是md文件查找所有图片语法如果是本地图片替换成网络图片
StringBuffer sb = replaceImageUrl(content, mdFilePath);
content = sb.toString();
}
} else {
log.warn("无法解析文档内容");
}
} else {
log.warn("未找到预期的 .md 文件");
}
return content;
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
if (tempPdf != null) {
try {
// 清理临时文件
Files.deleteIfExists(tempPdf.toPath());
} catch (IOException e) {
log.warn("删除临时文件失败: {}", tempPdf.getAbsolutePath(), e);
}
}
//清理输出目录
if (outputPath != null) {
cleanOutputDirectory(outputPath);
}
}
}
@Override
public List<String> getChunkList(String content, String kid) {
return characterTextSplitter.split(content, kid);
}
/**
* 正则匹配图片语法,多线程进行处理
*
* @param content 文本内容
* @param basePath 图片路径
* @return
*/
private StringBuffer replaceImageUrl(String content, Path basePath) throws Exception {
List<ImageMatch> matches = new ArrayList<>();
Matcher matcher = MD_IMAGE_PATTERN.matcher(content);
// 收集所有匹配的图片项
while (matcher.find()) {
ImageMatch imgMatch = new ImageMatch();
imgMatch.altText = matcher.group(1);
imgMatch.imageUrl = matcher.group(2);
imgMatch.start = matcher.start();
imgMatch.end = matcher.end();
matches.add(imgMatch);
}
if (matches.isEmpty()) {
return new StringBuffer(content);
}
// 提交任务到线程池
List<Future<String>> futures = new ArrayList<>();
for (ImageMatch imgMatch : matches) {
// 为每个图片项创建独立任务
Future<String> future = ocrExecutor.submit(() -> processImage(imgMatch, basePath));
futures.add(future);
}
// 按原始顺序拼接结果
StringBuffer sb = new StringBuffer();
int previousEnd = 0;
for (int i = 0; i < matches.size(); i++) {
ImageMatch imgMatch = matches.get(i);
// 阻塞等待结果
String replacement = futures.get(i).get();
// 插入未匹配的原始文本和处理后的结果
sb.append(content.substring(previousEnd, imgMatch.start));
sb.append(replacement);
previousEnd = imgMatch.end;
}
// 添加剩余文本
sb.append(content.substring(previousEnd));
return sb;
}
/**
* 图片处理任务
*
* @param imgMatch 图片匹配结果
* @param basePath 本地图片路径
* @return
*/
private String processImage(ImageMatch imgMatch, Path basePath) {
try {
if (!imgMatch.imageUrl.startsWith("http")) {
// 处理本地图片
Path imagePath = basePath.getParent().resolve(imgMatch.imageUrl).normalize();
if (!Files.exists(imagePath)) {
log.error("图片路径不存在: {}", imagePath);
return String.format("![%s](%s)", imgMatch.altText, imgMatch.imageUrl);
}
// 文件后缀安全提取
String originalFileName = imagePath.getFileName().toString();
String suffix = "";
int lastDotIndex = originalFileName.lastIndexOf(".");
if (lastDotIndex != -1) {
suffix = originalFileName.substring(lastDotIndex);
}
// 上传OSS
try (InputStream inputStream = Files.newInputStream(imagePath)) {
OssClient storage = OssFactory.instance();
UploadResult uploadResult = storage.uploadSuffix(inputStream, suffix, FileUtils.getMimeType(suffix));
// 保存数据库记录
SysOss sysOss = new SysOss();
sysOss.setUrl(uploadResult.getUrl());
sysOss.setFileSuffix(suffix);
sysOss.setFileName(uploadResult.getFilename());
sysOss.setOriginalName(originalFileName);
sysOss.setService(storage.getConfigKey());
sysOssMapper.insert(sysOss);
// OCR处理
String networkUrl = uploadResult.getUrl();
//⚠️ 注意:确保 URL 是公网可访问的,否则模型无法加载图片。
//另一种解决方案使用base64 但是需要申请apikey , 使用demo会出现token超出长度问题。
String ocrResult = safeImageUrlOCR(networkUrl);
return String.format("![%s%s](%s)", imgMatch.altText, ocrResult, networkUrl);
}
} else {
// 处理远程图片
String ocrResult = safeImageUrlOCR(imgMatch.imageUrl);
return String.format("![%s%s](%s)", imgMatch.altText, ocrResult, imgMatch.imageUrl);
}
} catch (Exception e) {
log.error("图片处理失败: {}", imgMatch.imageUrl, e);
return String.format("![%s](%s)", imgMatch.altText, imgMatch.imageUrl);
}
}
/**
* OCR调用
*
* @param imageUrl 图片URL
* @return
*/
private String safeImageUrlOCR(String imageUrl) {
try {
return imageUrlOCR(imageUrl);
} catch (Exception e) {
log.warn("OCR处理失败: {}", imageUrl, e);
// OCR失败时返回空字符串
return "";
}
}
/**
* 静态内部类保存图片匹配信息
*/
private static class ImageMatch {
String altText; // 替换文本
String imageUrl; // 图片地址
int start; // 匹配起始位置
int end; // 匹配结束位置
}
}

View File

@@ -1,14 +0,0 @@
package org.ruoyi.chain.loader;
import java.io.InputStream;
import java.util.List;
/**
* 资源载入
*/
public interface ResourceLoader {
String getContent(InputStream inputStream);
List<String> getChunkList(String content, String kid);
}

View File

@@ -1,41 +0,0 @@
package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import org.ruoyi.chain.split.*;
import org.ruoyi.config.properties.PdfProperties;
import org.ruoyi.constant.FileType;
import org.ruoyi.system.mapper.SysOssMapper;
import org.springframework.stereotype.Component;
@AllArgsConstructor
@Component
public class ResourceLoaderFactory {
private final CharacterTextSplitter characterTextSplitter;
private final CodeTextSplitter codeTextSplitter;
private final MarkdownTextSplitter markdownTextSplitter;
private final TokenTextSplitter tokenTextSplitter;
private final ExcelTextSplitter excelTextSplitter;
private final PdfProperties pdfProperties;
private final SysOssMapper sysOssMapper;
public ResourceLoader getLoaderByFileType(String fileType) {
if (FileType.isTextFile(fileType)) {
return new TextFileLoader(characterTextSplitter);
} else if (FileType.isWord(fileType)) {
return new WordLoader(characterTextSplitter);
} else if (FileType.isPdf(fileType) && pdfProperties.getTransition().isEnableMinerU()) {
return new PdfMinerUFileLoader(characterTextSplitter, pdfProperties, sysOssMapper);
} else if (FileType.isPdf(fileType)) {
return new PdfFileLoader(characterTextSplitter);
} else if (FileType.isMdFile(fileType)) {
return new MarkDownFileLoader(markdownTextSplitter);
} else if (FileType.isCodeFile(fileType)) {
return new CodeFileLoader(codeTextSplitter);
} else if (FileType.isExcel(fileType)) {
return new ExcelFileLoader(excelTextSplitter);
} else {
return new TextFileLoader(characterTextSplitter);
}
}
}

View File

@@ -1,38 +0,0 @@
package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Collectors;
@Component
@AllArgsConstructor
@Slf4j
public class TextFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
String stringBuffer = "";
try (InputStreamReader reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
BufferedReader bufferedReader = new BufferedReader(reader)) {
stringBuffer = bufferedReader.lines().collect(Collectors.joining());
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer;
}
@Override
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -1,38 +0,0 @@
package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.ruoyi.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class WordLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
XWPFDocument document = null;
try {
document = new XWPFDocument(inputStream);
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
String content = extractor.getText();
return content;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -1,63 +0,0 @@
package org.ruoyi.chain.split;
import jakarta.annotation.Resource;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.service.IKnowledgeInfoService;
import org.springframework.context.annotation.Lazy;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Component
@Slf4j
@Primary
public class CharacterTextSplitter implements TextSplitter {
@Lazy
@Resource
private IKnowledgeInfoService knowledgeInfoService;
@Override
public List<String> split(String content, String kid) {
// 从知识库表中获取配置
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
String knowledgeSeparator = knowledgeInfoVo.getKnowledgeSeparator();
int textBlockSize = knowledgeInfoVo.getTextBlockSize();
int overlapChar = knowledgeInfoVo.getOverlapChar();
List<String> chunkList = new ArrayList<>();
if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) {
// 按自定义分隔符切分
String[] chunks = content.split(knowledgeSeparator);
chunkList.addAll(Arrays.asList(chunks));
} else {
int indexMin = 0;
int len = content.length();
int i = 0;
int right = 0;
while (true) {
if (len > right) {
int begin = i * textBlockSize - overlapChar;
if (begin < indexMin) {
begin = indexMin;
}
int end = textBlockSize * (i + 1) + overlapChar;
if (end > len) {
end = len;
}
String chunk = content.substring(begin, end);
chunkList.add(chunk);
i++;
right = right + textBlockSize;
} else {
break;
}
}
}
return chunkList;
}
}

View File

@@ -1,17 +0,0 @@
package org.ruoyi.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class CodeTextSplitter implements TextSplitter {
@Override
public List<String> split(String content, String kid) {
return null;
}
}

View File

@@ -1,55 +0,0 @@
package org.ruoyi.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.utils.StringUtils;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class ExcelTextSplitter implements TextSplitter {
@Override
public List<String> split(String content, String kid) {
// 使用默认配置
String knowledgeSeparator = "#";
int textBlockSize = 10000;
int overlapChar = 500;
List<String> chunkList = new ArrayList<>();
if (content.contains(knowledgeSeparator) && StringUtils.isNotBlank(knowledgeSeparator)) {
// 按自定义分隔符切分
String[] chunks = content.split(knowledgeSeparator);
chunkList.addAll(Arrays.asList(chunks));
} else {
int indexMin = 0;
int len = content.length();
int i = 0;
int right = 0;
while (true) {
if (len > right) {
int begin = i * textBlockSize - overlapChar;
if (begin < indexMin) {
begin = indexMin;
}
int end = textBlockSize * (i + 1) + overlapChar;
if (end > len) {
end = len;
}
String chunk = content.substring(begin, end);
chunkList.add(chunk);
i++;
right = right + textBlockSize;
} else {
break;
}
}
}
return chunkList;
}
}

View File

@@ -1,17 +0,0 @@
package org.ruoyi.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class MarkdownTextSplitter implements TextSplitter {
@Override
public List<String> split(String content, String kid) {
return null;
}
}

View File

@@ -1,18 +0,0 @@
package org.ruoyi.chain.split;
import java.util.List;
/**
* 文本切分
*/
public interface TextSplitter {
/**
* 文本切分
*
* @param content 文本内容
* @param kid 知识库id
* @return 切分后的文本列表
*/
List<String> split(String content, String kid);
}

View File

@@ -1,17 +0,0 @@
package org.ruoyi.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class TokenTextSplitter implements TextSplitter {
@Override
public List<String> split(String content, String kid) {
return null;
}
}

View File

@@ -1,83 +0,0 @@
package org.ruoyi.config.properties;
import lombok.Data;
import lombok.NoArgsConstructor;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
/**
* PDF 配置属性
*
* @author zpx
*/
@Data
@Component
@ConfigurationProperties(prefix = "pdf")
public class PdfProperties {
/**
* Extract 配置
*/
private ExtractConfig extract;
/**
* Transition 配置
*/
private TransitionConfig transition;
@Data
@NoArgsConstructor
public static class ExtractConfig {
/**
* Service 配置
*/
private ServiceConfig service;
/**
* AI API 配置
*/
private AiApiConfig aiApi;
@Data
@NoArgsConstructor
public static class ServiceConfig {
/**
* 服务地址 URL
*/
private String url;
}
@Data
@NoArgsConstructor
public static class AiApiConfig {
/**
* AI API 地址 URL
*/
private String url;
/**
* API 密钥
*/
private String key;
}
}
@Data
@NoArgsConstructor
public static class TransitionConfig {
/**
* 是否启用 MinerU
*/
private boolean enableMinerU;
/**
* MinerU Conda 环境路径
*/
private String condaEnvPath;
/**
* 是否启用图片 OCR
*/
private boolean enableOcr;
}
}

View File

@@ -1,96 +0,0 @@
package org.ruoyi.constant;
public class FileType {
public static final String TXT = "txt";
public static final String CSV = "csv";
public static final String MD = "md";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String PDF = "pdf";
public static final String XLS = "xls";
public static final String XLSX = "xlsx";
public static final String LOG = "log";
public static final String XML = "xml";
public static final String JAVA = "java";
public static final String HTML = "html";
public static final String HTM = "htm";
public static final String CSS = "css";
public static final String JS = "js";
public static final String PY = "py";
public static final String CPP = "cpp";
public static final String SQL = "sql";
public static final String PHP = "php";
public static final String RUBY = "ruby";
public static final String C = "c";
public static final String H = "h";
public static final String HPP = "hpp";
public static final String SWIFT = "swift";
public static final String TS = "ts";
public static final String RUST = "rs";
public static final String PERL = "perl";
public static final String SHELL = "shell";
public static final String BAT = "bat";
public static final String CMD = "cmd";
public static final String PROPERTIES = "properties";
public static final String INI = "ini";
public static final String YAML = "yaml";
public static final String YML = "yml";
public static boolean isTextFile(String type) {
if (type.equalsIgnoreCase(TXT) || type.equalsIgnoreCase(CSV) || type.equalsIgnoreCase(PROPERTIES)
|| type.equalsIgnoreCase(INI) || type.equalsIgnoreCase(YAML) || type.equalsIgnoreCase(YML)
|| type.equalsIgnoreCase(LOG) || type.equalsIgnoreCase(XML)) {
return true;
} else {
return false;
}
}
public static boolean isCodeFile(String type) {
if (type.equalsIgnoreCase(JAVA) || type.equalsIgnoreCase(HTML) || type.equalsIgnoreCase(HTM) || type.equalsIgnoreCase(JS) || type.equalsIgnoreCase(PY)
|| type.equalsIgnoreCase(CPP) || type.equalsIgnoreCase(SQL) || type.equalsIgnoreCase(PHP) || type.equalsIgnoreCase(RUBY)
|| type.equalsIgnoreCase(C) || type.equalsIgnoreCase(H) || type.equalsIgnoreCase(HPP) || type.equalsIgnoreCase(SWIFT)
|| type.equalsIgnoreCase(TS) || type.equalsIgnoreCase(RUST) || type.equalsIgnoreCase(PERL) || type.equalsIgnoreCase(SHELL)
|| type.equalsIgnoreCase(BAT) || type.equalsIgnoreCase(CMD) || type.equalsIgnoreCase(CSS)) {
return true;
} else {
return false;
}
}
public static boolean isMdFile(String type) {
if (type.equalsIgnoreCase(MD)) {
return true;
} else {
return false;
}
}
public static boolean isWord(String type) {
if (type.equalsIgnoreCase(DOC) || type.equalsIgnoreCase(DOCX)) {
return true;
} else {
return false;
}
}
public static boolean isPdf(String type) {
if (type.equalsIgnoreCase(PDF)) {
return true;
} else {
return false;
}
}
public static boolean isExcel(String type) {
if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)) {
return true;
} else {
return false;
}
}
}

View File

@@ -1,83 +0,0 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.core.domain.BaseEntity;
import java.io.Serial;
/**
* 知识库附件对象 knowledge_attach
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_attach")
public class KnowledgeAttach extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 文档ID
*/
private String docId;
/**
* 文档名称
*/
private String docName;
/**
* 文档类型
*/
private String docType;
/**
* 文档内容
*/
private String content;
/**
* 备注
*/
private String remark;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
private Integer vectorStatus;
}

View File

@@ -1,62 +0,0 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.core.domain.BaseEntity;
import java.io.Serial;
/**
* 知识片段对象 knowledge_fragment
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_fragment")
public class KnowledgeFragment extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 文档ID
*/
private String docId;
/**
* 知识片段ID
*/
private String fid;
/**
* 片段索引下标
*/
private Integer idx;
/**
* 文档内容
*/
private String content;
/**
* 备注
*/
private String remark;
}

View File

@@ -1,107 +0,0 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.core.domain.BaseEntity;
import java.io.Serial;
/**
* 知识库对象 knowledge_info
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_info")
public class KnowledgeInfo extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 用户ID
*/
private Long uid;
/**
* 知识库名称
*/
private String kname;
/**
* 是否公开知识库0 否 1是
*/
private Integer share;
/**
* 描述
*/
private String description;
/**
* 知识分隔符
*/
private String knowledgeSeparator;
/**
* 提问分隔符
*/
private String questionSeparator;
/**
* 重叠字符数
*/
private Long overlapChar;
/**
* 知识库中检索的条数
*/
private Long retrieveLimit;
/**
* 文本块大小
*/
private Long textBlockSize;
/**
* 向量库模型名称
*/
private String vectorModelName;
/**
* 向量化模型id
*/
private Long embeddingModelId;
/**
* 向量化模型名称
*/
private String embeddingModelName;
/**
* 系统提示词
*/
private String systemPrompt;
/**
* 备注
*/
private String remark;
}

View File

@@ -1,61 +0,0 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.TableField;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.core.domain.BaseEntity;
import java.io.Serial;
import java.util.List;
/**
* 知识库角色对象 knowledge_role
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_role")
public class KnowledgeRole extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
* 知识库角色id
*/
@TableId(value = "id")
private Long id;
/**
* 知识库角色组id
*/
private Long groupId;
/**
* 知识库角色name
*/
private String name;
/**
* 删除标志0代表存在 2代表删除
*/
// @TableLogic
private String delFlag;
/**
* 备注
*/
private String remark;
/**
* 知识库id列表
*/
@TableField(exist = false)
private List<Long> knowledgeIds;
}

View File

@@ -1,48 +0,0 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.core.domain.BaseEntity;
import java.io.Serial;
/**
* 知识库角色组对象 knowledge_role_group
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_role_group")
public class KnowledgeRoleGroup extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
* 知识库角色组id
*/
@TableId(value = "id")
private Long id;
/**
* 知识库角色组name
*/
private String name;
/**
* 删除标志0代表存在 2代表删除
*/
// @TableLogic
private String delFlag;
/**
* 备注
*/
private String remark;
}

View File

@@ -1,54 +0,0 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableLogic;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.core.domain.BaseEntity;
import java.io.Serial;
/**
* 知识库角色与知识库关联对象 knowledge_role_relation
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_role_relation")
public class KnowledgeRoleRelation extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
* id
*/
@TableId(value = "id")
private Long id;
/**
* 删除标志0代表存在 2代表删除
*/
@TableLogic
private String delFlag;
/**
* 备注
*/
private String remark;
/**
* 知识库角色id
*/
private Long knowledgeRoleId;
/**
* 知识库id
*/
private Long knowledgeId;
}

View File

@@ -1,92 +0,0 @@
package org.ruoyi.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import org.ruoyi.domain.KnowledgeAttach;
/**
* 知识库附件业务对象 knowledge_attach
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeAttach.class, reverseConvertGenerate = false)
public class KnowledgeAttachBo extends BaseEntity {
/**
*
*/
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String docId;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String docName;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
private String docType;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@NotNull(message = "拆解图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@NotNull(message = "写入向量数据库状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer vectorStatus;
}

View File

@@ -1,67 +0,0 @@
package org.ruoyi.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import org.ruoyi.domain.KnowledgeFragment;
/**
* 知识片段业务对象 knowledge_fragment
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeFragment.class, reverseConvertGenerate = false)
public class KnowledgeFragmentBo extends BaseEntity {
/**
*
*/
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String docId;
/**
* 知识片段ID
*/
@NotBlank(message = "知识片段ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String fid;
/**
* 片段索引下标
*/
@NotNull(message = "片段索引下标不能为空", groups = {AddGroup.class, EditGroup.class})
private Long idx;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
}

View File

@@ -1,113 +0,0 @@
package org.ruoyi.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import org.ruoyi.domain.KnowledgeInfo;
/**
* 知识库业务对象 knowledge_info
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeInfo.class, reverseConvertGenerate = false)
public class KnowledgeInfoBo extends BaseEntity {
/**
* 主键
*/
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = {EditGroup.class})
private String kid;
/**
* 用户ID
*/
@NotNull(message = "用户ID不能为空", groups = {EditGroup.class})
private Long uid;
/**
* 知识库名称
*/
@NotBlank(message = "知识库名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String kname;
/**
* 是否公开知识库0 否 1是
*/
@NotNull(message = "是否公开知识库0 否 1是不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer share;
/**
* 描述
*/
private String description;
/**
* 知识分隔符
*/
private String knowledgeSeparator;
/**
* 提问分隔符
*/
private String questionSeparator;
/**
* 重叠字符数
*/
private Long overlapChar;
/**
* 知识库中检索的条数
*/
@NotNull(message = "知识库中检索的条数不能为空", groups = {AddGroup.class, EditGroup.class})
private Long retrieveLimit;
/**
* 文本块大小
*/
@NotNull(message = "文本块大小不能为空", groups = {AddGroup.class, EditGroup.class})
private Long textBlockSize;
/**
* 向量库模型名称
*/
@NotBlank(message = "向量库不能为空", groups = {AddGroup.class, EditGroup.class})
private String vectorModelName;
/**
* 向量化模型名称
*/
private Long embeddingModelId;
/**
* 向量化模型名称
*/
private String embeddingModelName;
/**
* 系统提示词
*/
private String systemPrompt;
/**
* 备注
*/
private String remark;
}

View File

@@ -1,16 +0,0 @@
package org.ruoyi.domain.bo;
import lombok.Data;
import org.springframework.web.multipart.MultipartFile;
/**
* @author ageer
*/
@Data
public class KnowledgeInfoUploadBo {
private String kid;
private MultipartFile file;
}

View File

@@ -1,54 +0,0 @@
package org.ruoyi.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import org.ruoyi.domain.KnowledgeRole;
import java.util.List;
/**
* 知识库角色业务对象 knowledge_role
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeRole.class, reverseConvertGenerate = false)
public class KnowledgeRoleBo extends BaseEntity {
/**
* 知识库角色id
*/
@NotNull(message = "知识库角色id不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库角色组id
*/
@NotNull(message = "知识库角色组id不能为空", groups = {AddGroup.class, EditGroup.class})
private Long groupId;
/**
* 知识库角色name
*/
@NotBlank(message = "知识库角色name不能为空", groups = {AddGroup.class, EditGroup.class})
private String name;
/**
* 备注
*/
private String remark;
/**
* 知识库id列表
*/
private List<Long> knowledgeIds;
}

View File

@@ -1,42 +0,0 @@
package org.ruoyi.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import org.ruoyi.domain.KnowledgeRoleGroup;
/**
* 知识库角色组业务对象 knowledge_role_group
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeRoleGroup.class, reverseConvertGenerate = false)
public class KnowledgeRoleGroupBo extends BaseEntity {
/**
* 知识库角色组id
*/
@NotNull(message = "知识库角色组id不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库角色组name
*/
@NotBlank(message = "知识库角色组name不能为空", groups = {AddGroup.class, EditGroup.class})
private String name;
/**
* 备注
*/
private String remark;
}

View File

@@ -1,49 +0,0 @@
package org.ruoyi.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import org.ruoyi.domain.KnowledgeRoleRelation;
/**
* 知识库角色与知识库关联业务对象 knowledge_role_relation
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeRoleRelation.class, reverseConvertGenerate = false)
public class KnowledgeRoleRelationBo extends BaseEntity {
/**
* id
*/
@NotNull(message = "id不能为空", groups = {EditGroup.class})
private Long id;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
/**
* 知识库角色id
*/
@NotNull(message = "知识库角色id不能为空", groups = {AddGroup.class, EditGroup.class})
private Long knowledgeRoleId;
/**
* 知识库id
*/
@NotNull(message = "知识库id不能为空", groups = {AddGroup.class, EditGroup.class})
private Long knowledgeId;
}

View File

@@ -1,54 +0,0 @@
package org.ruoyi.domain.bo;
import lombok.Data;
/**
* 查询向量所需参数
*
* @author ageer
*/
@Data
public class QueryVectorBo {
/**
* 查询内容
*/
private String query;
/**
* 知识库kid
*/
private String kid;
/**
* 查询向量返回条数
*/
private Integer maxResults;
/**
* 向量库模型名称
*/
private String vectorModelName;
/**
* 向量化模型ID
*/
private Long embeddingModelId;
/**
* 向量化模型ID
*/
private String embeddingModelName;
/**
* 请求key
*/
private String apiKey;
/**
* 请求地址
*/
private String baseUrl;
}

View File

@@ -1,60 +0,0 @@
package org.ruoyi.domain.bo;
import lombok.Data;
import java.util.List;
/**
* 保存向量所需参数
*
* @author ageer
*/
@Data
public class StoreEmbeddingBo {
/**
* 切分文本块列表
*/
private List<String> chunkList;
/**
* 知识库kid
*/
private String kid;
/**
* 文档id
*/
private String docId;
/**
* 知识块id列表
*/
private List<String> fids;
/**
* 向量库名称
*/
private String vectorStoreName;
/**
* 向量化模型id
*/
private Long embeddingModelId;
/**
* 向量化模型名称
*/
private String embeddingModelName;
/**
* 请求key
*/
private String apiKey;
/**
* 请求地址
*/
private String baseUrl;
}

View File

@@ -1,93 +0,0 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.domain.KnowledgeAttach;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识库附件视图对象 knowledge_attach
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeAttach.class)
public class KnowledgeAttachVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 文档ID
*/
@ExcelProperty(value = "文档ID")
private String docId;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "拆解图片状态10未开始20进行中30已完成")
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "写入向量数据库状态10未开始20进行中30已完成")
private Integer vectorStatus;
}

View File

@@ -1,71 +0,0 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.domain.KnowledgeFragment;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识片段视图对象 knowledge_fragment
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeFragment.class)
public class KnowledgeFragmentVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 文档ID
*/
@ExcelProperty(value = "文档ID")
private String docId;
/**
* 知识片段ID
*/
@ExcelProperty(value = "知识片段ID")
private String fid;
/**
* 片段索引下标
*/
@ExcelProperty(value = "片段索引下标")
private Long idx;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
}

View File

@@ -1,125 +0,0 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.common.excel.annotation.ExcelDictFormat;
import org.ruoyi.common.excel.convert.ExcelDictConvert;
import org.ruoyi.domain.KnowledgeInfo;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识库视图对象 knowledge_info
*
* @author ageerle
* @date 2025-04-08
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeInfo.class)
public class KnowledgeInfoVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 用户ID
*/
@ExcelProperty(value = "用户ID")
private Long uid;
/**
* 知识库名称
*/
@ExcelProperty(value = "知识库名称")
private String kname;
/**
* 是否公开知识库0 否 1是
*/
@ExcelProperty(value = "是否公开知识库", converter = ExcelDictConvert.class)
@ExcelDictFormat(readConverterExp = "0=,否=,1=是")
private Integer share;
/**
* 描述
*/
@ExcelProperty(value = "描述")
private String description;
/**
* 知识分隔符
*/
@ExcelProperty(value = "知识分隔符")
private String knowledgeSeparator;
/**
* 提问分隔符
*/
@ExcelProperty(value = "提问分隔符")
private String questionSeparator;
/**
* 重叠字符数
*/
@ExcelProperty(value = "重叠字符数")
private Integer overlapChar;
/**
* 知识库中检索的条数
*/
@ExcelProperty(value = "知识库中检索的条数")
private Integer retrieveLimit;
/**
* 文本块大小
*/
@ExcelProperty(value = "文本块大小")
private Integer textBlockSize;
/**
* 向量库模型名称
*/
private String vectorModelName;
/**
* 向量化模型id
*/
private Long embeddingModelId;
/**
* 向量化模型名称
*/
private String embeddingModelName;
/**
* 系统提示词
*/
private String systemPrompt;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
}

View File

@@ -1,71 +0,0 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.domain.KnowledgeRoleGroup;
import java.io.Serial;
import java.io.Serializable;
import java.util.Date;
/**
* 知识库角色组视图对象 knowledge_role_group
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeRoleGroup.class)
public class KnowledgeRoleGroupVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
* 知识库角色组id
*/
@ExcelProperty(value = "知识库角色组id")
private Long id;
/**
* 知识库角色组name
*/
@ExcelProperty(value = "知识库角色组名称")
private String name;
/**
* 创建者
*/
@ExcelProperty(value = "创建者")
private Long createBy;
/**
* 创建时间
*/
@ExcelProperty(value = "创建时间")
private Date createTime;
/**
* 更新者
*/
@ExcelProperty(value = "更新时间")
private Long updateBy;
/**
* 更新时间
*/
@ExcelProperty(value = "更新时间")
private Date updateTime;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
}

View File

@@ -1,52 +0,0 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.domain.KnowledgeRoleRelation;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识库角色与知识库关联视图对象 knowledge_role_relation
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeRoleRelation.class)
public class KnowledgeRoleRelationVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
* id
*/
@ExcelProperty(value = "id")
private Long id;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
/**
* 知识库角色id
*/
@ExcelProperty(value = "知识库角色id")
private Long knowledgeRoleId;
/**
* 知识库id
*/
@ExcelProperty(value = "知识库id")
private Long knowledgeId;
}

View File

@@ -1,89 +0,0 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.domain.KnowledgeRole;
import java.io.Serial;
import java.io.Serializable;
import java.util.Date;
import java.util.List;
/**
* 知识库角色视图对象 knowledge_role
*
* @author ageerle
* @date 2025-07-19
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeRole.class)
public class KnowledgeRoleVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
* 知识库角色id
*/
@ExcelProperty(value = "知识库角色id")
private Long id;
/**
* 知识库角色组id
*/
@ExcelProperty(value = "知识库角色组id")
private Long groupId;
/**
* 知识库角色name
*/
@ExcelProperty(value = "知识库角色name")
private String name;
/**
* 创建者
*/
@ExcelProperty(value = "创建者")
private Long createBy;
/**
* 创建时间
*/
@ExcelProperty(value = "创建时间")
private Date createTime;
/**
* 更新者
*/
@ExcelProperty(value = "更新时间")
private Long updateBy;
/**
* 更新时间
*/
@ExcelProperty(value = "更新时间")
private Date updateTime;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
/**
* 知识库id列表
*/
private List<Long> knowledgeIds;
/**
* 角色组名称
*/
private String groupName;
}

View File

@@ -1,28 +0,0 @@
package org.ruoyi.embedding;
import dev.langchain4j.model.embedding.EmbeddingModel;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.embedding.model.ModalityType;
import java.util.Set;
/**
* BaseEmbedModelService 接口,扩展了 EmbeddingModel 接口
* 该接口定义了嵌入模型服务的基本配置和功能方法
*/
public interface BaseEmbedModelService extends EmbeddingModel {
/**
* 根据配置信息配置嵌入模型
*
* @param config 包含模型配置信息的 ChatModelVo 对象
*/
void configure(ChatModelVo config);
/**
* 获取当前嵌入模型支持的所有模态类型
*
* @return 返回支持的模态类型集合
*/
Set<ModalityType> getSupportedModalities();
}

View File

@@ -1,120 +0,0 @@
package org.ruoyi.embedding;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.service.IChatModelService;
import org.springframework.beans.factory.NoSuchBeanDefinitionException;
import org.springframework.context.ApplicationContext;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* 嵌入模型工厂服务类
* 负责创建和管理各种嵌入模型实例
*/
@Service
@RequiredArgsConstructor
@Slf4j
public class EmbeddingModelFactory {
private final ApplicationContext applicationContext;
private final IChatModelService chatModelService;
// 模型缓存使用ConcurrentHashMap保证线程安全
private final Map<String, BaseEmbedModelService> modelCache = new ConcurrentHashMap<>();
/**
* 创建嵌入模型实例
* 如果模型已存在于缓存中,则直接返回;否则创建新的实例
*
* @param embeddingModelName 嵌入模型名称
* @param dimension 模型维度大小
*/
public BaseEmbedModelService createModel(String embeddingModelName, Integer dimension) {
return modelCache.computeIfAbsent(embeddingModelName, name -> {
ChatModelVo modelConfig = chatModelService.selectModelByName(embeddingModelName);
if (modelConfig == null) {
throw new IllegalArgumentException("未找到模型配置name=" + name);
}
if (modelConfig.getDimension() != null) {
modelConfig.setDimension(dimension);
}
return createModelInstance(modelConfig.getProviderName(), modelConfig);
});
}
/**
* 检查模型是否支持多模态
*
* @param embeddingModelName 嵌入模型名称
* @return boolean 如果模型支持多模态则返回true否则返回false
*/
public boolean isMultimodalModel(String embeddingModelName) {
return createModel(embeddingModelName, null) instanceof MultiModalEmbedModelService;
}
/**
* 创建多模态嵌入模型实例
*
* @param embeddingModelName 嵌入模型名称
* @return MultiModalEmbedModelService 多模态嵌入模型服务实例
* @throws IllegalArgumentException 当模型不支持多模态时抛出
*/
public MultiModalEmbedModelService createMultimodalModel(String embeddingModelName) {
BaseEmbedModelService model = createModel(embeddingModelName, null);
if (model instanceof MultiModalEmbedModelService) {
return (MultiModalEmbedModelService) model;
}
throw new IllegalArgumentException("该模型不支持多模态");
}
/**
* 刷新模型缓存
* 根据给定的嵌入模型ID从缓存中移除对应的模型
*
* @param embeddingModelId 嵌入模型的唯一标识ID
*/
public void refreshModel(Long embeddingModelId) {
// 从模型缓存中移除指定ID的模型
modelCache.remove(embeddingModelId);
}
/**
* 获取所有支持模型工厂的列表
*
* @return List<String> 支持的模型工厂名称列表
*/
public List<String> getSupportedFactories() {
return new ArrayList<>(applicationContext.getBeansOfType(BaseEmbedModelService.class)
.keySet());
}
/**
* 创建具体的模型实例
* 根据提供的工厂名称和配置信息创建并配置模型实例
*
* @param factory 工厂名称,用于标识模型类型
* @param config 模型配置信息
* @return BaseEmbedModelService 配置好的模型实例
* @throws IllegalArgumentException 当无法获取指定的模型实例时抛出
*/
private BaseEmbedModelService createModelInstance(String factory, ChatModelVo config) {
try {
// 从Spring上下文中获取模型实例
BaseEmbedModelService model = applicationContext.getBean(factory, BaseEmbedModelService.class);
// 配置模型参数
model.configure(config);
log.info("成功创建嵌入模型: factory={}, modelId={}", config.getProviderName(), config.getId());
return model;
} catch (NoSuchBeanDefinitionException e) {
throw new IllegalArgumentException("获取不到嵌入模型: " + factory, e);
}
}
}

View File

@@ -1,37 +0,0 @@
package org.ruoyi.embedding;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.output.Response;
import org.ruoyi.embedding.model.MultiModalInput;
/**
* 多模态嵌入模型服务接口,继承自基础嵌入模型服务
* 该接口提供了处理图像、视频以及多模态数据并转换为嵌入向量的功能
*/
public interface MultiModalEmbedModelService extends BaseEmbedModelService {
/**
* 将图像数据转换为嵌入向量
*
* @param imageDataUrl 图像的地址必须是公开可访问的URL
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
*/
Response<Embedding> embedImage(String imageDataUrl);
/**
* 将视频数据转换为嵌入向量
*
* @param videoDataUrl 视频的地址必须是公开可访问的URL
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
*/
Response<Embedding> embedVideo(String videoDataUrl);
/**
* 处理多模态输入并返回嵌入向量的方法
*
* @param input 包含多种模态信息(如图像、文本等)的输入对象
* @return Response<Embedding> 包含嵌入向量的响应对象Embedding通常表示输入数据的向量表示
*/
Response<Embedding> embedMultiModal(MultiModalInput input);
}

View File

@@ -1,50 +0,0 @@
package org.ruoyi.embedding.impl;
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.embedding.model.ModalityType;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Set;
/**
* @Author: Robust_H
* @Date: 2025-09-30-下午3:00
* @Description: 阿里百炼基础嵌入模型兼容openai
*/
@Component("alibailian")
public class AliBaiLianBaseEmbedProvider extends OpenAiEmbeddingProvider {
private ChatModelVo chatModelVo;
@Override
public void configure(ChatModelVo config) {
this.chatModelVo = config;
}
@Override
public Set<ModalityType> getSupportedModalities() {
return Set.of();
}
@Override
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
return QwenEmbeddingModel.builder()
// todo 测试 后面要改
// .baseUrl(chatModelVo.getApiHost())
.apiKey(chatModelVo.getApiKey())
.modelName(chatModelVo.getModelName())
.dimension(1024)
// .dimension(chatModelVo.getDimension())
.build()
.embedAll(textSegments);
}
}

View File

@@ -1,292 +0,0 @@
package org.ruoyi.embedding.impl;
import com.fasterxml.jackson.databind.ObjectMapper;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.model.output.TokenUsage;
import lombok.extern.slf4j.Slf4j;
import okhttp3.*;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.embedding.MultiModalEmbedModelService;
import org.ruoyi.embedding.model.AliyunMultiModalEmbedRequest;
import org.ruoyi.embedding.model.AliyunMultiModalEmbedResponse;
import org.ruoyi.embedding.model.ModalityType;
import org.ruoyi.embedding.model.MultiModalInput;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* 阿里云百炼多模态嵌入模型服务实现类
* 实现了MultiModalEmbedModelService接口提供文本、图像和视频的嵌入向量生成服务
*/
@Component("bailianMultiModel")
@Slf4j
public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelService {
private final OkHttpClient okHttpClient;
private ChatModelVo chatModelVo;
/**
* 构造函数初始化HTTP客户端
* 设置连接超时、读取超时和写入超时时间
*/
public AliBaiLianMultiEmbeddingProvider() {
this.okHttpClient = new OkHttpClient.Builder()
.connectTimeout(30, TimeUnit.SECONDS)
.readTimeout(60, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
}
/**
* 图像嵌入向量生成
*
* @param imageDataUrl 图像数据的URL
* @return 包含图像嵌入向量的Response对象
*/
@Override
public Response<Embedding> embedImage(String imageDataUrl) {
return embedSingleModality("image", imageDataUrl);
}
/**
* 视频嵌入向量生成
*
* @param videoDataUrl 视频数据的URL
* @return 包含视频嵌入向量的Response对象
*/
@Override
public Response<Embedding> embedVideo(String videoDataUrl) {
return embedSingleModality("video", videoDataUrl);
}
/**
* 多模态嵌入向量生成
* 支持同时处理文本、图像和视频等多种模态的数据
*
* @param input 包含多种模态输入的对象
* @return 包含多模态嵌入向量的Response对象
*/
@Override
public Response<Embedding> embedMultiModal(MultiModalInput input) {
try {
// 构建请求内容
List<Map<String, Object>> contents = buildContentMap(input);
if (contents.isEmpty()) {
throw new IllegalArgumentException("至少提供一种模态的内容");
}
// 构建请求
AliyunMultiModalEmbedRequest request = buildRequest(contents, chatModelVo);
AliyunMultiModalEmbedResponse resp = executeRequest(request, chatModelVo);
// 转换为 embeddings
Response<List<Embedding>> response = toEmbeddings(resp);
List<Embedding> embeddings = response.content();
if (embeddings.isEmpty()) {
log.warn("阿里云混合模态嵌入返回为空");
return Response.from(Embedding.from(new float[0]), response.tokenUsage());
}
// 多模态通常取第一个向量作为代表,也可以根据业务场景返回多个
return Response.from(embeddings.get(0), response.tokenUsage());
} catch (Exception e) {
log.error("阿里云混合模态嵌入失败", e);
throw new IllegalArgumentException("阿里云混合模态嵌入失败", e);
}
}
/**
* 配置模型参数
*
* @param config 模型配置信息
*/
@Override
public void configure(ChatModelVo config) {
this.chatModelVo = config;
}
/**
* 获取支持的模态类型
*
* @return 支持的模态类型集合
*/
@Override
public Set<ModalityType> getSupportedModalities() {
return Set.of(ModalityType.TEXT, ModalityType.VIDEO, ModalityType.IMAGE);
}
/**
* 批量文本嵌入向量生成
*
* @param textSegments 文本段列表
* @return 包含所有文本嵌入向量的Response对象
*/
@Override
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
if (textSegments.isEmpty()) return Response.from(Collections.emptyList());
try {
List<Map<String, Object>> contents = new ArrayList<>();
for (TextSegment segment : textSegments) {
contents.add(Map.of("text", segment.text()));
}
AliyunMultiModalEmbedRequest request = buildRequest(contents, chatModelVo);
AliyunMultiModalEmbedResponse resp = executeRequest(request, chatModelVo);
return toEmbeddings(resp);
} catch (Exception e) {
log.error("阿里云文本嵌入失败", e);
throw new IllegalArgumentException("阿里云文本嵌入失败", e);
}
}
/**
* 单模态嵌入(图片/视频/单条文本)复用方法
*
* @param key 模态类型image/video/text
* @param dataUrl 数据URL
* @return 包含嵌入向量的Response对象
*/
public Response<Embedding> embedSingleModality(String key, String dataUrl) {
try {
AliyunMultiModalEmbedRequest request = buildRequest(List.of(Map.of(key, dataUrl)), chatModelVo);
AliyunMultiModalEmbedResponse resp = executeRequest(request, chatModelVo);
Response<List<Embedding>> response = toEmbeddings(resp);
List<Embedding> embeddings = response.content();
if (embeddings.isEmpty()) {
log.warn("阿里云 {} 嵌入返回为空", key);
return Response.from(Embedding.from(new float[0]), response.tokenUsage());
}
return Response.from(embeddings.get(0), response.tokenUsage());
} catch (Exception e) {
log.error("阿里云 {} 嵌入失败", key, e);
throw new IllegalArgumentException("阿里云 " + key + " 嵌入失败", e);
}
}
/**
* 构建请求对象
*
* @param contents 请求内容列表
* @param chatModelVo 模型配置信息
* @return 构建好的请求对象
*/
private AliyunMultiModalEmbedRequest buildRequest(List<Map<String, Object>> contents, ChatModelVo chatModelVo) {
if (contents.isEmpty()) throw new IllegalArgumentException("请求内容不能为空");
return AliyunMultiModalEmbedRequest.create(chatModelVo.getModelName(), contents);
}
/**
* 执行 HTTP 请求并解析响应
*
* @param request 请求对象
* @param chatModelVo 模型配置信息
* @return API响应对象
* @throws IOException IO异常
*/
private AliyunMultiModalEmbedResponse executeRequest(AliyunMultiModalEmbedRequest request, ChatModelVo chatModelVo) throws IOException {
String jsonBody = request.toJson();
RequestBody body = RequestBody.create(jsonBody, MediaType.get("application/json"));
Request httpRequest = new Request.Builder()
.url(chatModelVo.getApiHost())
.addHeader("Authorization", "Bearer " + chatModelVo.getApiKey())
.post(body)
.build();
try (okhttp3.Response response = okHttpClient.newCall(httpRequest).execute()) {
if (!response.isSuccessful()) {
String err = response.body() != null ? response.body().string() : "无错误信息";
throw new IllegalArgumentException("API调用失败: " + response.code() + " - " + err, null);
}
ResponseBody responseBody = response.body();
if (responseBody == null) throw new IllegalArgumentException("响应体为空", null);
return parseEmbeddingsFromResponse(responseBody.string());
}
}
/**
* 解析嵌入向量列表
*
* @param responseBody API响应的JSON字符串
* @return 嵌入向量响应对象
* @throws IOException IO异常
*/
private AliyunMultiModalEmbedResponse parseEmbeddingsFromResponse(String responseBody) throws IOException {
ObjectMapper objectMapper1 = new ObjectMapper();
return objectMapper1.readValue(responseBody, AliyunMultiModalEmbedResponse.class);
}
/**
* 构建 API 请求内容 Map
*
* @param input 多模态输入对象
* @return 包含各种模态内容的Map列表
*/
private List<Map<String, Object>> buildContentMap(MultiModalInput input) {
List<Map<String, Object>> contents = new ArrayList<>();
if (input.getText() != null && !input.getText().isBlank()) {
contents.add(Map.of("text", input.getText()));
}
if (input.getImageUrl() != null && !input.getImageUrl().isBlank()) {
contents.add(Map.of("image", input.getImageUrl()));
}
if (input.getVideoUrl() != null && !input.getVideoUrl().isBlank()) {
contents.add(Map.of("video", input.getVideoUrl()));
}
if (input.getMultiImageUrls() != null && input.getMultiImageUrls().length > 0) {
contents.add(Map.of("multi_images", Arrays.asList(input.getMultiImageUrls())));
}
return contents;
}
/**
* 将 API 原始响应解析为 LangChain4j 的 Response<Embedding>
*
* @param resp API原始响应对象
* @return 包含嵌入向量和token使用情况的Response对象
*/
private Response<List<Embedding>> toEmbeddings(AliyunMultiModalEmbedResponse resp) {
if (resp == null || resp.output() == null || resp.output().embeddings() == null) {
return Response.from(Collections.emptyList());
}
// 转换 double -> float
List<Embedding> embeddings = resp.output().embeddings().stream()
.map(item -> {
float[] vector = new float[item.embedding().size()];
for (int i = 0; i < item.embedding().size(); i++) {
vector[i] = item.embedding().get(i).floatValue();
}
return Embedding.from(vector);
})
.toList();
// 构建 TokenUsage
TokenUsage tokenUsage = null;
if (resp.usage() != null) {
tokenUsage = new TokenUsage(
resp.usage().input_tokens(),
resp.usage().image_tokens(),
resp.usage().input_tokens() + resp.usage().image_tokens()
);
}
return Response.from(embeddings, tokenUsage);
}
}

View File

@@ -1,43 +0,0 @@
package org.ruoyi.embedding.impl;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
import dev.langchain4j.model.output.Response;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.embedding.BaseEmbedModelService;
import org.ruoyi.embedding.model.ModalityType;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Set;
/**
* @Author: Robust_H
* @Date: 2025-09-30-下午3:00
* @Description: Ollama嵌入模型
*/
@Component("ollama")
public class OllamaEmbeddingProvider implements BaseEmbedModelService {
private ChatModelVo chatModelVo;
@Override
public void configure(ChatModelVo config) {
this.chatModelVo = config;
}
@Override
public Set<ModalityType> getSupportedModalities() {
return Set.of(ModalityType.TEXT);
}
// ollama不能设置embedding维度使用milvus时请注意创建向量表时需要先设定维度大小
@Override
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
return OllamaEmbeddingModel.builder()
.baseUrl(chatModelVo.getApiHost())
.modelName(chatModelVo.getModelName())
.build()
.embedAll(textSegments);
}
}

View File

@@ -1,44 +0,0 @@
package org.ruoyi.embedding.impl;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import dev.langchain4j.model.output.Response;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.embedding.BaseEmbedModelService;
import org.ruoyi.embedding.model.ModalityType;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Set;
/**
* @Author: Robust_H
* @Date: 2025-09-30-下午3:59
* @Description: OpenAi嵌入模型
*/
@Component("openai")
public class OpenAiEmbeddingProvider implements BaseEmbedModelService {
protected ChatModelVo chatModelVo;
@Override
public void configure(ChatModelVo config) {
this.chatModelVo = config;
}
@Override
public Set<ModalityType> getSupportedModalities() {
return Set.of(ModalityType.TEXT);
}
@Override
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
return OpenAiEmbeddingModel.builder()
.baseUrl(chatModelVo.getApiHost())
.apiKey(chatModelVo.getApiKey())
.modelName(chatModelVo.getModelName())
.dimensions(chatModelVo.getDimension())
.build()
.embedAll(textSegments);
}
}

Some files were not shown because too many files have changed in this diff Show More