Merge branch 'main' into main

This commit is contained in:
ageerle
2025-12-12 11:46:19 +08:00
committed by GitHub
535 changed files with 21172 additions and 14160 deletions

View File

@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>

View File

@@ -16,11 +16,12 @@ import java.util.List;
@Slf4j
public class CodeFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader bufferedReader = new BufferedReader(reader)){
BufferedReader bufferedReader = new BufferedReader(reader)) {
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
@@ -30,8 +31,9 @@ public class CodeFileLoader implements ResourceLoader {
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content, String kid){
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -12,12 +12,14 @@ import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class ExcelFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
private static final int DEFAULT_BUFFER_SIZE = 8192;
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
// 使用带缓冲的输入流包装(保持原流不自动关闭)

View File

@@ -3,7 +3,7 @@ package org.ruoyi.chain.loader;
import java.io.InputStream;
import java.util.List;
public class FolderLoader implements ResourceLoader{
public class FolderLoader implements ResourceLoader {
@Override
public String getContent(InputStream inputStream) {
return null;

View File

@@ -16,11 +16,12 @@ import java.util.List;
@Slf4j
public class MarkDownFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader bufferedReader = new BufferedReader(reader)){
BufferedReader bufferedReader = new BufferedReader(reader)) {
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
@@ -30,8 +31,9 @@ public class MarkDownFileLoader implements ResourceLoader {
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content, String kid){
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -14,6 +14,7 @@ import java.util.List;
@AllArgsConstructor
public class PdfFileLoader implements ResourceLoader {
private final TextSplitter characterTextSplitter;
@Override
public String getContent(InputStream inputStream) {
PDDocument document = null;

View File

@@ -14,7 +14,6 @@ import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.FilenameUtils;
import org.ruoyi.chain.split.TextSplitter;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.core.utils.file.FileUtils;
import org.ruoyi.common.oss.core.OssClient;
import org.ruoyi.common.oss.entity.UploadResult;
@@ -44,11 +43,11 @@ import java.util.regex.Pattern;
@Component
@AllArgsConstructor
public class PdfMinerUFileLoader implements ResourceLoader {
// 预编译正则表达式
private static final Pattern MD_IMAGE_PATTERN = Pattern.compile("!\\[(.*?)]\\((.*?)(\\s*=\\d+)?\\)");
private final TextSplitter characterTextSplitter;
private final PdfProperties properties;
private final SysOssMapper sysOssMapper;
// 预编译正则表达式
private static final Pattern MD_IMAGE_PATTERN = Pattern.compile("!\\[(.*?)]\\((.*?)(\\s*=\\d+)?\\)");
// OCR图片识别线程池
private final ThreadPoolExecutor ocrExecutor = new ThreadPoolExecutor(
// 核心线程数
@@ -63,6 +62,184 @@ public class PdfMinerUFileLoader implements ResourceLoader {
new ThreadPoolExecutor.CallerRunsPolicy()
);
/**
* 创建临时PDF文件
*
* @param is 输入流
* @return
* @throws IOException
*/
private static File createTempFile(InputStream is) throws IOException {
File tempFile = File.createTempFile("upload_", ".pdf");
Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
return tempFile;
}
/**
* 构建跨平台文件输出路径
*
* @return
* @throws IOException
*/
private static Path buildOutputPath() throws IOException {
Path basePath = isWindows() ?
// Windows C盘用户路径下 minerUOutPut避免其他盘符权限问题
Paths.get(System.getProperty("user.home")).resolve("minerUOutPut") :
Paths.get("/var/minerUOutPut");
if (!Files.exists(basePath)) {
Files.createDirectories(basePath);
}
return basePath;
}
/**
* 判断当前操作系统是否为Windows
*
* @return
*/
private static boolean isWindows() {
return System.getProperty("os.name").toLowerCase().contains("win");
}
/**
* 执行命令
*
* @param condaEnv conda环境路径
* @param inputFile 输入文件
* @param outputPath 输出路径
* @return
* @throws IOException
*/
private static Process buildProcess(String condaEnv, File inputFile, Path outputPath) throws IOException {
ProcessBuilder pb = new ProcessBuilder();
String[] command;
if (isWindows()) {
command = new String[]{
"cmd", "/c",
"call", "conda", "activate",
condaEnv.replace("\"", ""),
"&&", "magic-pdf",
"-p", inputFile.getAbsolutePath(),
"-o", outputPath.toString()
};
} else {
command = new String[]{
"bash", "-c",
String.format("source '%s/bin/activate' && magic-pdf -p '%s' -o '%s'",
condaEnv,
inputFile.getAbsolutePath(),
outputPath.toString())
};
}
return pb.command(command)
.redirectErrorStream(true)
.start();
}
/**
* 实时日志输出
*
* @param process 进程
*/
private static void logProcessOutput(Process process) {
Executors.newSingleThreadExecutor().submit(() -> {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
log.info("[PROCESS LOG] " + line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
}
/**
* 验证转换结果
*
* @param inputFile 输入文件
* @param outputPath 输出路径
* @param exitCode 退出码
* @return
*/
private static String verifyResult(File inputFile, Path outputPath, int exitCode) {
String baseName = FilenameUtils.removeExtension(inputFile.getName());
Path expectedMd = outputPath
.resolve(baseName)
.resolve("auto")
.resolve(baseName + ".md");
if (exitCode == 0 && Files.exists(expectedMd)) {
log.info("转换成功:{}", expectedMd.toString());
return expectedMd.toString();
}
return String.format("转换失败(退出码%d| 预期文件:%s", exitCode, expectedMd);
}
/**
* 多模态OCR识别图片内容
*
* @param imageUrl 图片URL
* @return
*/
private static String imageUrlOCR(String imageUrl) {
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey("demo")
.modelName("gpt-4o-mini")
.baseUrl("http://langchain4j.dev/demo/openai/v1")
.build();
UserMessage userMessage = UserMessage.from(
TextContent.from(
"请按以下逻辑处理图片:\n" +
"1. 文字检测:识别图中所有可见文字(包括水印/标签若无文字则跳至步骤3\n" +
"2. 文字处理:\n" +
" a. 对识别到的文字进行❗核心信息提炼\n" +
" b. ❗禁止直接输出原文内容\n" +
" c. 描述文字位置(如'顶部居中')、字体特征(颜色/大小)\n" +
"3. 视觉描述:\n" +
" a. 若无文字则用❗50字内简洁描述主体对象、场景、色彩搭配与画面氛围\n" +
" b. 若有文字则补充说明文字与画面的关系\n" +
"4. 输出规则:\n" +
" - 最终输出为纯文本,格式:'[文字总结] 视觉描述 关键词xx,xx'\n" +
" - 关键词从内容中提取3个最具代表性的名词\n" +
" - 无文字时格式:'[空] 简洁描述 关键词xx,xx'"
),
ImageContent.from(imageUrl)
);
ChatResponse chat = model.chat(userMessage);
AiMessage answer = chat.aiMessage();
return answer.text();
}
/**
* 清理输出目录
*
* @param outputPath 输出目录
*/
private static void cleanOutputDirectory(Path outputPath) {
if (Files.exists(outputPath)) {
try {
Files.walk(outputPath)
// 按逆序删除(子目录先删)
.sorted((p1, p2) -> -p1.compareTo(p2))
.forEach(path -> {
try {
Files.delete(path);
} catch (IOException e) {
log.warn("清理输出目录失败: {}", path, e);
}
});
} catch (IOException e) {
log.error("遍历输出目录失败", e);
}
}
}
@Override
public String getContent(InputStream inputStream) {
String content = "";
@@ -127,127 +304,6 @@ public class PdfMinerUFileLoader implements ResourceLoader {
return characterTextSplitter.split(content, kid);
}
/**
* 创建临时PDF文件
*
* @param is 输入流
* @return
* @throws IOException
*/
private static File createTempFile(InputStream is) throws IOException {
File tempFile = File.createTempFile("upload_", ".pdf");
Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
return tempFile;
}
/**
* 构建跨平台文件输出路径
*
* @return
* @throws IOException
*/
private static Path buildOutputPath() throws IOException {
Path basePath = isWindows() ?
// Windows C盘用户路径下 minerUOutPut避免其他盘符权限问题
Paths.get(System.getProperty("user.home")).resolve("minerUOutPut") :
Paths.get("/var/minerUOutPut");
if (!Files.exists(basePath)) {
Files.createDirectories(basePath);
}
return basePath;
}
/**
* 判断当前操作系统是否为Windows
*
* @return
*/
private static boolean isWindows() {
return System.getProperty("os.name").toLowerCase().contains("win");
}
/**
* 执行命令
*
* @param condaEnv conda环境路径
* @param inputFile 输入文件
* @param outputPath 输出路径
* @return
* @throws IOException
*/
private static Process buildProcess(String condaEnv, File inputFile, Path outputPath) throws IOException {
ProcessBuilder pb = new ProcessBuilder();
String[] command;
if (isWindows()) {
command = new String[]{
"cmd", "/c",
"call", "conda", "activate",
condaEnv.replace("\"", ""),
"&&", "magic-pdf",
"-p", inputFile.getAbsolutePath(),
"-o", outputPath.toString()
};
} else {
command = new String[]{
"bash", "-c",
String.format("source '%s/bin/activate' && magic-pdf -p '%s' -o '%s'",
condaEnv,
inputFile.getAbsolutePath(),
outputPath.toString())
};
}
return pb.command(command)
.redirectErrorStream(true)
.start();
}
/**
* 实时日志输出
*
* @param process 进程
*/
private static void logProcessOutput(Process process) {
Executors.newSingleThreadExecutor().submit(() -> {
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(process.getInputStream()))) {
String line;
while ((line = reader.readLine()) != null) {
log.info("[PROCESS LOG] " + line);
}
} catch (IOException e) {
e.printStackTrace();
}
});
}
/**
* 验证转换结果
*
* @param inputFile 输入文件
* @param outputPath 输出路径
* @param exitCode 退出码
* @return
*/
private static String verifyResult(File inputFile, Path outputPath, int exitCode) {
String baseName = FilenameUtils.removeExtension(inputFile.getName());
Path expectedMd = outputPath
.resolve(baseName)
.resolve("auto")
.resolve(baseName + ".md");
if (exitCode == 0 && Files.exists(expectedMd)) {
log.info("转换成功:{}", expectedMd.toString());
return expectedMd.toString();
}
return String.format("转换失败(退出码%d| 预期文件:%s", exitCode, expectedMd);
}
/**
* 正则匹配图片语法,多线程进行处理
*
@@ -300,7 +356,6 @@ public class PdfMinerUFileLoader implements ResourceLoader {
return sb;
}
/**
* 图片处理任务
*
@@ -375,43 +430,6 @@ public class PdfMinerUFileLoader implements ResourceLoader {
}
}
/**
* 多模态OCR识别图片内容
*
* @param imageUrl 图片URL
* @return
*/
private static String imageUrlOCR(String imageUrl) {
OpenAiChatModel model = OpenAiChatModel.builder()
.apiKey("demo")
.modelName("gpt-4o-mini")
.baseUrl("http://langchain4j.dev/demo/openai/v1")
.build();
UserMessage userMessage = UserMessage.from(
TextContent.from(
"请按以下逻辑处理图片:\n" +
"1. 文字检测:识别图中所有可见文字(包括水印/标签若无文字则跳至步骤3\n" +
"2. 文字处理:\n" +
" a. 对识别到的文字进行❗核心信息提炼\n" +
" b. ❗禁止直接输出原文内容\n" +
" c. 描述文字位置(如'顶部居中')、字体特征(颜色/大小)\n" +
"3. 视觉描述:\n" +
" a. 若无文字则用❗50字内简洁描述主体对象、场景、色彩搭配与画面氛围\n" +
" b. 若有文字则补充说明文字与画面的关系\n" +
"4. 输出规则:\n" +
" - 最终输出为纯文本,格式:'[文字总结] 视觉描述 关键词xx,xx'\n" +
" - 关键词从内容中提取3个最具代表性的名词\n" +
" - 无文字时格式:'[空] 简洁描述 关键词xx,xx'"
),
ImageContent.from(imageUrl)
);
ChatResponse chat = model.chat(userMessage);
AiMessage answer = chat.aiMessage();
return answer.text();
}
/**
* 静态内部类保存图片匹配信息
*/
@@ -421,29 +439,4 @@ public class PdfMinerUFileLoader implements ResourceLoader {
int start; // 匹配起始位置
int end; // 匹配结束位置
}
/**
* 清理输出目录
*
* @param outputPath 输出目录
*/
private static void cleanOutputDirectory(Path outputPath) {
if (Files.exists(outputPath)) {
try {
Files.walk(outputPath)
// 按逆序删除(子目录先删)
.sorted((p1, p2) -> -p1.compareTo(p2))
.forEach(path -> {
try {
Files.delete(path);
} catch (IOException e) {
log.warn("清理输出目录失败: {}", path, e);
}
});
} catch (IOException e) {
log.error("遍历输出目录失败", e);
}
}
}
}

View File

@@ -2,11 +2,7 @@ package org.ruoyi.chain.loader;
import lombok.AllArgsConstructor;
import org.ruoyi.chain.split.*;
import org.ruoyi.config.properties.PdfProperties;
import org.ruoyi.chain.split.CharacterTextSplitter;
import org.ruoyi.chain.split.CodeTextSplitter;
import org.ruoyi.chain.split.MarkdownTextSplitter;
import org.ruoyi.constant.FileType;
import org.ruoyi.system.mapper.SysOssMapper;
import org.springframework.stereotype.Component;
@@ -23,22 +19,22 @@ public class ResourceLoaderFactory {
private final SysOssMapper sysOssMapper;
public ResourceLoader getLoaderByFileType(String fileType){
if (FileType.isTextFile(fileType)){
public ResourceLoader getLoaderByFileType(String fileType) {
if (FileType.isTextFile(fileType)) {
return new TextFileLoader(characterTextSplitter);
} else if (FileType.isWord(fileType)) {
return new WordLoader(characterTextSplitter);
} else if (FileType.isPdf(fileType) && pdfProperties.getTransition().isEnableMinerU()) {
return new PdfMinerUFileLoader(characterTextSplitter,pdfProperties,sysOssMapper);
return new PdfMinerUFileLoader(characterTextSplitter, pdfProperties, sysOssMapper);
} else if (FileType.isPdf(fileType)) {
return new PdfFileLoader(characterTextSplitter);
}else if (FileType.isMdFile(fileType)) {
} else if (FileType.isMdFile(fileType)) {
return new MarkDownFileLoader(markdownTextSplitter);
}else if (FileType.isCodeFile(fileType)) {
} else if (FileType.isCodeFile(fileType)) {
return new CodeFileLoader(codeTextSplitter);
} else if (FileType.isExcel(fileType)) {
return new ExcelFileLoader(excelTextSplitter);
}else {
} else {
return new TextFileLoader(characterTextSplitter);
}
}

View File

@@ -9,29 +9,30 @@ import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.List;
import java.util.stream.Collectors;
@Component
@AllArgsConstructor
@Slf4j
public class TextFileLoader implements ResourceLoader{
public class TextFileLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream, "UTF-8");
BufferedReader bufferedReader = new BufferedReader(reader)){
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
String stringBuffer = "";
try (InputStreamReader reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
BufferedReader bufferedReader = new BufferedReader(reader)) {
stringBuffer = bufferedReader.lines().collect(Collectors.joining());
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer.toString();
return stringBuffer;
}
@Override
public List<String> getChunkList(String content, String kid){
public List<String> getChunkList(String content, String kid) {
return textSplitter.split(content, kid);
}
}

View File

@@ -16,6 +16,7 @@ import java.util.List;
@Slf4j
public class WordLoader implements ResourceLoader {
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
XWPFDocument document = null;

View File

@@ -1,12 +1,8 @@
package org.ruoyi.chain.split;
import jakarta.annotation.Resource;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.service.IKnowledgeInfoService;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
@@ -16,7 +12,7 @@ import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class ExcelTextSplitter implements TextSplitter{
public class ExcelTextSplitter implements TextSplitter {
@Override

View File

@@ -9,7 +9,7 @@ import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class MarkdownTextSplitter implements TextSplitter{
public class MarkdownTextSplitter implements TextSplitter {
@Override
public List<String> split(String content, String kid) {
return null;

View File

@@ -39,62 +39,56 @@ public class FileType {
public static final String YAML = "yaml";
public static final String YML = "yml";
public static boolean isTextFile(String type){
public static boolean isTextFile(String type) {
if (type.equalsIgnoreCase(TXT) || type.equalsIgnoreCase(CSV) || type.equalsIgnoreCase(PROPERTIES)
|| type.equalsIgnoreCase(INI) || type.equalsIgnoreCase(YAML) || type.equalsIgnoreCase(YML)
|| type.equalsIgnoreCase(LOG) || type.equalsIgnoreCase(XML)){
|| type.equalsIgnoreCase(LOG) || type.equalsIgnoreCase(XML)) {
return true;
}
else {
} else {
return false;
}
}
public static boolean isCodeFile(String type){
public static boolean isCodeFile(String type) {
if (type.equalsIgnoreCase(JAVA) || type.equalsIgnoreCase(HTML) || type.equalsIgnoreCase(HTM) || type.equalsIgnoreCase(JS) || type.equalsIgnoreCase(PY)
|| type.equalsIgnoreCase(CPP) || type.equalsIgnoreCase(SQL) || type.equalsIgnoreCase(PHP) || type.equalsIgnoreCase(RUBY)
|| type.equalsIgnoreCase(C) || type.equalsIgnoreCase(H) || type.equalsIgnoreCase(HPP) || type.equalsIgnoreCase(SWIFT)
|| type.equalsIgnoreCase(TS) || type.equalsIgnoreCase(RUST) || type.equalsIgnoreCase(PERL) || type.equalsIgnoreCase(SHELL)
|| type.equalsIgnoreCase(BAT) || type.equalsIgnoreCase(CMD) || type.equalsIgnoreCase(CSS)){
|| type.equalsIgnoreCase(BAT) || type.equalsIgnoreCase(CMD) || type.equalsIgnoreCase(CSS)) {
return true;
}
else {
} else {
return false;
}
}
public static boolean isMdFile(String type){
if (type.equalsIgnoreCase(MD)){
public static boolean isMdFile(String type) {
if (type.equalsIgnoreCase(MD)) {
return true;
}
else {
} else {
return false;
}
}
public static boolean isWord(String type){
if (type.equalsIgnoreCase(DOC) || type.equalsIgnoreCase(DOCX)){
public static boolean isWord(String type) {
if (type.equalsIgnoreCase(DOC) || type.equalsIgnoreCase(DOCX)) {
return true;
}
else {
} else {
return false;
}
}
public static boolean isPdf(String type){
if (type.equalsIgnoreCase(PDF)){
public static boolean isPdf(String type) {
if (type.equalsIgnoreCase(PDF)) {
return true;
}
else {
} else {
return false;
}
}
public static boolean isExcel(String type){
if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){
public static boolean isExcel(String type) {
if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)) {
return true;
}
else {
} else {
return false;
}
}

View File

@@ -19,66 +19,65 @@ import java.io.Serial;
@TableName("knowledge_attach")
public class KnowledgeAttach extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@TableId(value = "id")
private Long id;
/**
*
*/
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 知识库ID
*/
private String kid;
/**
* 文档ID
*/
private String docId;
/**
* 文档ID
*/
private String docId;
/**
* 文档名称
*/
private String docName;
/**
* 文档名称
*/
private String docName;
/**
* 文档类型
*/
private String docType;
/**
* 文档类型
*/
private String docType;
/**
* 文档内容
*/
private String content;
/**
* 文档内容
*/
private String content;
/**
* 备注
*/
private String remark;
/**
* 备注
*/
private String remark;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
private Integer picStatus;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
private Integer vectorStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
private Integer vectorStatus;
}

View File

@@ -21,72 +21,72 @@ import org.ruoyi.domain.KnowledgeAttach;
@AutoMapper(target = KnowledgeAttach.class, reverseConvertGenerate = false)
public class KnowledgeAttachBo extends BaseEntity {
/**
*
*/
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
*
*/
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String docId;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String docId;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String docName;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String docName;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
private String docType;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
private String docType;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@NotNull(message = "拆解图片状态10未开始20进行中30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
private Integer picStatus;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@NotNull(message = "拆解图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
private Integer picAnysStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@NotNull(message = "写入向量数据库状态10未开始20进行中30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
private Integer vectorStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@NotNull(message = "写入向量数据库状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer vectorStatus;
}

View File

@@ -24,43 +24,43 @@ public class KnowledgeFragmentBo extends BaseEntity {
/**
*
*/
@NotNull(message = "不能为空", groups = { EditGroup.class })
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String docId;
/**
* 知识片段ID
*/
@NotBlank(message = "知识片段ID不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "知识片段ID不能为空", groups = {AddGroup.class, EditGroup.class})
private String fid;
/**
* 片段索引下标
*/
@NotNull(message = "片段索引下标不能为空", groups = { AddGroup.class, EditGroup.class })
@NotNull(message = "片段索引下标不能为空", groups = {AddGroup.class, EditGroup.class})
private Long idx;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;

View File

@@ -22,33 +22,33 @@ import org.ruoyi.domain.KnowledgeInfo;
public class KnowledgeInfoBo extends BaseEntity {
/**
* 主键
* 主键
*/
@NotNull(message = "不能为空", groups = { EditGroup.class })
@NotNull(message = "不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空", groups = {EditGroup.class })
@NotBlank(message = "知识库ID不能为空", groups = {EditGroup.class})
private String kid;
/**
* 用户ID
*/
@NotNull(message = "用户ID不能为空", groups = {EditGroup.class })
@NotNull(message = "用户ID不能为空", groups = {EditGroup.class})
private Long uid;
/**
* 知识库名称
*/
@NotBlank(message = "知识库名称不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "知识库名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String kname;
/**
* 是否公开知识库0 否 1是
*/
@NotNull(message = "是否公开知识库0 否 1是不能为空", groups = { AddGroup.class, EditGroup.class })
@NotNull(message = "是否公开知识库0 否 1是不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer share;
/**
@@ -74,19 +74,19 @@ public class KnowledgeInfoBo extends BaseEntity {
/**
* 知识库中检索的条数
*/
@NotNull(message = "知识库中检索的条数不能为空", groups = { AddGroup.class, EditGroup.class })
@NotNull(message = "知识库中检索的条数不能为空", groups = {AddGroup.class, EditGroup.class})
private Long retrieveLimit;
/**
* 文本块大小
*/
@NotNull(message = "文本块大小不能为空", groups = { AddGroup.class, EditGroup.class })
@NotNull(message = "文本块大小不能为空", groups = {AddGroup.class, EditGroup.class})
private Long textBlockSize;
/**
* 向量库模型名称
*/
@NotBlank(message = "向量库不能为空", groups = { AddGroup.class, EditGroup.class })
@NotBlank(message = "向量库不能为空", groups = {AddGroup.class, EditGroup.class})
private String vectorModelName;
/**

View File

@@ -5,6 +5,7 @@ import lombok.Data;
/**
* 查询向量所需参数
*
* @author ageer
*/
@Data

View File

@@ -6,6 +6,7 @@ import java.util.List;
/**
* 保存向量所需参数
*
* @author ageer
*/
@Data

View File

@@ -21,74 +21,73 @@ import java.io.Serializable;
@AutoMapper(target = KnowledgeAttach.class)
public class KnowledgeAttachVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 文档ID
*/
@ExcelProperty(value = "文档ID")
private String docId;
/**
* 文档ID
*/
@ExcelProperty(value = "文档ID")
private String docId;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "拆解图片状态10未开始20进行中30已完成")
private Integer picStatus;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "拆解图片状态10未开始20进行中30已完成")
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "写入向量数据库状态10未开始20进行中30已完成")
private Integer vectorStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "写入向量数据库状态10未开始20进行中30已完成")
private Integer vectorStatus;
}

View File

@@ -11,8 +11,6 @@ import java.io.Serial;
import java.io.Serializable;
/**
* 知识片段视图对象 knowledge_fragment
*

View File

@@ -13,8 +13,6 @@ import java.io.Serial;
import java.io.Serializable;
/**
* 知识库视图对象 knowledge_info
*

View File

@@ -13,12 +13,14 @@ import java.util.Set;
public interface BaseEmbedModelService extends EmbeddingModel {
/**
* 根据配置信息配置嵌入模型
*
* @param config 包含模型配置信息的 ChatModelVo 对象
*/
void configure(ChatModelVo config);
/**
* 获取当前嵌入模型支持的所有模态类型
*
* @return 返回支持的模态类型集合
*/
Set<ModalityType> getSupportedModalities();

View File

@@ -34,7 +34,7 @@ public class EmbeddingModelFactory {
* 如果模型已存在于缓存中,则直接返回;否则创建新的实例
*
* @param embeddingModelName 嵌入模型名称
* @param dimension 模型维度大小
* @param dimension 模型维度大小
*/
public BaseEmbedModelService createModel(String embeddingModelName, Integer dimension) {
return modelCache.computeIfAbsent(embeddingModelName, name -> {
@@ -81,7 +81,7 @@ public class EmbeddingModelFactory {
* @param embeddingModelId 嵌入模型的唯一标识ID
*/
public void refreshModel(Long embeddingModelId) {
// 从模型缓存中移除指定ID的模型
// 从模型缓存中移除指定ID的模型
modelCache.remove(embeddingModelId);
}
@@ -100,7 +100,7 @@ public class EmbeddingModelFactory {
* 根据提供的工厂名称和配置信息创建并配置模型实例
*
* @param factory 工厂名称,用于标识模型类型
* @param config 模型配置信息
* @param config 模型配置信息
* @return BaseEmbedModelService 配置好的模型实例
* @throws IllegalArgumentException 当无法获取指定的模型实例时抛出
*/

View File

@@ -12,6 +12,7 @@ import org.ruoyi.embedding.model.MultiModalInput;
public interface MultiModalEmbedModelService extends BaseEmbedModelService {
/**
* 将图像数据转换为嵌入向量
*
* @param imageDataUrl 图像的地址必须是公开可访问的URL
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
*/
@@ -19,6 +20,7 @@ public interface MultiModalEmbedModelService extends BaseEmbedModelService {
/**
* 将视频数据转换为嵌入向量
*
* @param videoDataUrl 视频的地址必须是公开可访问的URL
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
*/

View File

@@ -1,14 +1,50 @@
package org.ruoyi.embedding.impl;
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.output.Response;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.embedding.model.ModalityType;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Set;
/**
* @Author: Robust_H
* @Date: 2025-09-30-下午3:00
* @Description: 阿里百炼基础嵌入模型兼容openai
*/
@Component("alibailian")
public class AliBaiLianBaseEmbedProvider extends OpenAiEmbeddingProvider{
public class AliBaiLianBaseEmbedProvider extends OpenAiEmbeddingProvider {
private ChatModelVo chatModelVo;
@Override
public void configure(ChatModelVo config) {
this.chatModelVo = config;
}
@Override
public Set<ModalityType> getSupportedModalities() {
return Set.of();
}
@Override
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
return QwenEmbeddingModel.builder()
// todo 测试 后面要改
// .baseUrl(chatModelVo.getApiHost())
.apiKey(chatModelVo.getApiKey())
.modelName(chatModelVo.getModelName())
.dimension(1024)
// .dimension(chatModelVo.getDimension())
.build()
.embedAll(textSegments);
}
}

View File

@@ -26,9 +26,8 @@ import java.util.concurrent.TimeUnit;
@Component("bailianMultiModel")
@Slf4j
public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelService {
private ChatModelVo chatModelVo;
private final OkHttpClient okHttpClient;
private ChatModelVo chatModelVo;
/**
* 构造函数初始化HTTP客户端
@@ -44,6 +43,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 图像嵌入向量生成
*
* @param imageDataUrl 图像数据的URL
* @return 包含图像嵌入向量的Response对象
*/
@@ -54,6 +54,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 视频嵌入向量生成
*
* @param videoDataUrl 视频数据的URL
* @return 包含视频嵌入向量的Response对象
*/
@@ -65,6 +66,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 多模态嵌入向量生成
* 支持同时处理文本、图像和视频等多种模态的数据
*
* @param input 包含多种模态输入的对象
* @return 包含多模态嵌入向量的Response对象
*/
@@ -101,6 +103,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 配置模型参数
*
* @param config 模型配置信息
*/
@Override
@@ -110,6 +113,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 获取支持的模态类型
*
* @return 支持的模态类型集合
*/
@Override
@@ -119,6 +123,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 批量文本嵌入向量生成
*
* @param textSegments 文本段列表
* @return 包含所有文本嵌入向量的Response对象
*/
@@ -144,7 +149,8 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 单模态嵌入(图片/视频/单条文本)复用方法
* @param key 模态类型image/video/text
*
* @param key 模态类型image/video/text
* @param dataUrl 数据URL
* @return 包含嵌入向量的Response对象
*/
@@ -171,7 +177,8 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 构建请求对象
* @param contents 请求内容列表
*
* @param contents 请求内容列表
* @param chatModelVo 模型配置信息
* @return 构建好的请求对象
*/
@@ -182,7 +189,8 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 执行 HTTP 请求并解析响应
* @param request 请求对象
*
* @param request 请求对象
* @param chatModelVo 模型配置信息
* @return API响应对象
* @throws IOException IO异常
@@ -212,6 +220,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 解析嵌入向量列表
*
* @param responseBody API响应的JSON字符串
* @return 嵌入向量响应对象
* @throws IOException IO异常
@@ -223,6 +232,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 构建 API 请求内容 Map
*
* @param input 多模态输入对象
* @return 包含各种模态内容的Map列表
*/
@@ -247,6 +257,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
/**
* 将 API 原始响应解析为 LangChain4j 的 Response<Embedding>
*
* @param resp API原始响应对象
* @return 包含嵌入向量和token使用情况的Response对象
*/
@@ -272,7 +283,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
tokenUsage = new TokenUsage(
resp.usage().input_tokens(),
resp.usage().image_tokens(),
resp.usage().input_tokens() +resp.usage().image_tokens()
resp.usage().input_tokens() + resp.usage().image_tokens()
);
}

View File

@@ -25,6 +25,7 @@ public class OllamaEmbeddingProvider implements BaseEmbedModelService {
public void configure(ChatModelVo config) {
this.chatModelVo = config;
}
@Override
public Set<ModalityType> getSupportedModalities() {
return Set.of(ModalityType.TEXT);

View File

@@ -1,12 +1,8 @@
package org.ruoyi.embedding.impl;
import org.ruoyi.embedding.BaseEmbedModelService;
import org.ruoyi.embedding.model.ModalityType;
import org.springframework.stereotype.Component;
import java.util.Set;
/**
* @Author: Robust_H
* @Date: 2025-09-30-下午3:59

View File

@@ -1,7 +1,7 @@
package org.ruoyi.embedding.model;
import org.ruoyi.common.json.utils.JsonUtils;
import lombok.Data;
import org.ruoyi.common.json.utils.JsonUtils;
import java.util.List;
import java.util.Map;
@@ -16,14 +16,6 @@ public class AliyunMultiModalEmbedRequest {
private String model;
private Input input;
/**
* 表示输入数据的记录类(Record)
* 该类用于封装一个包含多个映射关系列表的输入数据结构
*
* @param contents 包含多个Map的列表每个Map中存储String类型的键和Object类型的值
*/
public record Input(List<Map<String, Object>> contents) { }
/**
* 创建请求对象
*/
@@ -41,4 +33,13 @@ public class AliyunMultiModalEmbedRequest {
public String toJson() {
return JsonUtils.toJsonString(this);
}
/**
* 表示输入数据的记录类(Record)
* 该类用于封装一个包含多个映射关系列表的输入数据结构
*
* @param contents 包含多个Map的列表每个Map中存储String类型的键和Object类型的值
*/
public record Input(List<Map<String, Object>> contents) {
}
}

View File

@@ -18,6 +18,7 @@ public interface KnowledgeInfoMapper extends BaseMapperPlus<KnowledgeInfo, Knowl
/**
* 根据kid查询知识库
*
* @param kid 知识库id
* @return KnowledgeInfo
*/

View File

@@ -56,7 +56,7 @@ public interface IKnowledgeAttachService {
/**
* 翻译文件
*
* @param file 文件
* @param file 文件
* @param targetLanguage 目标语音
*/
String translationByFile(MultipartFile file, String targetLanguage);

View File

@@ -8,6 +8,7 @@ import java.util.List;
/**
* 向量库管理
*
* @author ageer
*/
public interface VectorStoreService {
@@ -18,7 +19,7 @@ public interface VectorStoreService {
void createSchema(String kid, String embeddingModelName);
void removeById(String id,String modelName) throws ServiceException;
void removeById(String id, String modelName) throws ServiceException;
void removeByDocId(String docId, String kid) throws ServiceException;

View File

@@ -32,101 +32,100 @@ import java.util.Map;
@Service
public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
return flag;
}
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
public void removeKnowledgeAttach(String docId) {
Map<String, Object> map = new HashMap<>();
map.put("doc_id", docId);
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
@Override
public String translationByFile(MultipartFile file, String targetLanguage) {
return "接口开发中!";
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
public void removeKnowledgeAttach(String docId) {
Map<String, Object> map = new HashMap<>();
map.put("doc_id", docId);
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
@Override
public String translationByFile(MultipartFile file, String targetLanguage) {
return "接口开发中!";
}
}

View File

@@ -35,7 +35,7 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
* 查询知识片段
*/
@Override
public KnowledgeFragmentVo queryById(Long id){
public KnowledgeFragmentVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
@@ -96,7 +96,7 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeFragment entity){
private void validEntityBeforeSave(KnowledgeFragment entity) {
//TODO 做一些数据校验,如唯一约束
}
@@ -105,7 +105,7 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;

View File

@@ -8,7 +8,8 @@ import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.strategy.VectorStoreStrategyFactory;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Service;
import java.util.*;
import java.util.List;
/**
* 向量库管理

View File

@@ -1,16 +1,12 @@
package org.ruoyi.service.strategy;
import org.ruoyi.common.core.exception.ServiceException;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.config.VectorStoreProperties;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.embedding.EmbeddingModelFactory;
import org.ruoyi.service.VectorStoreService;
/**
* 向量库策略抽象基类
@@ -24,15 +20,7 @@ public abstract class AbstractVectorStoreStrategy implements VectorStoreService
protected final VectorStoreProperties vectorStoreProperties;
private final EmbeddingModelFactory embeddingModelFactory;
/**
* 获取向量模型
*/
@SneakyThrows
protected EmbeddingModel getEmbeddingModel(String modelName, Integer dimension) {
return embeddingModelFactory.createModel(modelName, dimension);
}
private final EmbeddingModelFactory embeddingModelFactory;
/**
* 将float数组转换为Float对象数组
@@ -45,6 +33,14 @@ public abstract class AbstractVectorStoreStrategy implements VectorStoreService
return result;
}
/**
* 获取向量模型
*/
@SneakyThrows
protected EmbeddingModel getEmbeddingModel(String modelName, Integer dimension) {
return embeddingModelFactory.createModel(modelName, dimension);
}
/**
* 获取向量库类型标识
*/

View File

@@ -4,9 +4,9 @@ import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.config.VectorStoreProperties;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.strategy.impl.MilvusVectorStoreStrategy;
import org.ruoyi.service.strategy.impl.WeaviateVectorStoreStrategy;
import org.ruoyi.service.VectorStoreService;
import org.springframework.stereotype.Component;
import java.util.HashMap;

View File

@@ -1,7 +1,6 @@
package org.ruoyi.service.strategy.impl;
import cn.hutool.json.JSONObject;
import org.ruoyi.common.core.exception.ServiceException;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.model.embedding.EmbeddingModel;
import io.weaviate.client.Config;
@@ -18,12 +17,17 @@ import io.weaviate.client.v1.schema.model.WeaviateClass;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.config.VectorStoreProperties;
import org.ruoyi.common.core.exception.ServiceException;
import org.ruoyi.domain.bo.QueryVectorBo;
import org.ruoyi.domain.bo.StoreEmbeddingBo;
import org.ruoyi.embedding.EmbeddingModelFactory;
import org.ruoyi.service.strategy.AbstractVectorStoreStrategy;
import org.springframework.stereotype.Component;
import java.util.*;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
/**
* Weaviate向量库策略实现
@@ -85,8 +89,8 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
@Override
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
createSchema(storeEmbeddingBo.getKid(),storeEmbeddingBo.getEmbeddingModelName());
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), null);
createSchema(storeEmbeddingBo.getKid(), storeEmbeddingBo.getEmbeddingModelName());
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), null);
List<String> chunkList = storeEmbeddingBo.getChunkList();
List<String> fidList = storeEmbeddingBo.getFids();
String kid = storeEmbeddingBo.getKid();
@@ -115,11 +119,10 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
}
@Override
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
createSchema(queryVectorBo.getKid(),queryVectorBo.getEmbeddingModelName());
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),null);
createSchema(queryVectorBo.getKid(), queryVectorBo.getEmbeddingModelName());
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(), null);
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
float[] vector = queryEmbedding.vector();
List<String> vectorStrings = new ArrayList<>();
@@ -128,7 +131,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
}
String vectorStr = String.join(",", vectorStrings);
String className = vectorStoreProperties.getWeaviate().getClassname();
// 构建 GraphQL 查询
String graphQLQuery = String.format(
"{\n" +

View File

@@ -4,6 +4,8 @@
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="org.ruoyi.mapper.KnowledgeInfoMapper">
<select id="selectByKid" resultType="org.ruoyi.domain.KnowledgeInfo">
SELECT * FROM knowledge_info WHERE kid = #{kid}
SELECT *
FROM knowledge_info
WHERE kid = #{kid}
</select>
</mapper>