mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-03-24 18:13:43 +08:00
Merge branch 'main' into main
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
|
||||
@@ -16,11 +16,12 @@ import java.util.List;
|
||||
@Slf4j
|
||||
public class CodeFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
StringBuffer stringBuffer = new StringBuffer();
|
||||
try (InputStreamReader reader = new InputStreamReader(inputStream);
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)){
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)) {
|
||||
String line;
|
||||
while ((line = bufferedReader.readLine()) != null) {
|
||||
stringBuffer.append(line).append("\n");
|
||||
@@ -30,8 +31,9 @@ public class CodeFileLoader implements ResourceLoader {
|
||||
}
|
||||
return stringBuffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChunkList(String content, String kid){
|
||||
public List<String> getChunkList(String content, String kid) {
|
||||
return textSplitter.split(content, kid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12,12 +12,14 @@ import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||
private final TextSplitter textSplitter;
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
// 使用带缓冲的输入流包装(保持原流不自动关闭)
|
||||
|
||||
@@ -3,7 +3,7 @@ package org.ruoyi.chain.loader;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
public class FolderLoader implements ResourceLoader{
|
||||
public class FolderLoader implements ResourceLoader {
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
return null;
|
||||
|
||||
@@ -16,11 +16,12 @@ import java.util.List;
|
||||
@Slf4j
|
||||
public class MarkDownFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
StringBuffer stringBuffer = new StringBuffer();
|
||||
try (InputStreamReader reader = new InputStreamReader(inputStream);
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)){
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)) {
|
||||
String line;
|
||||
while ((line = bufferedReader.readLine()) != null) {
|
||||
stringBuffer.append(line).append("\n");
|
||||
@@ -30,8 +31,9 @@ public class MarkDownFileLoader implements ResourceLoader {
|
||||
}
|
||||
return stringBuffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChunkList(String content, String kid){
|
||||
public List<String> getChunkList(String content, String kid) {
|
||||
return textSplitter.split(content, kid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ import java.util.List;
|
||||
@AllArgsConstructor
|
||||
public class PdfFileLoader implements ResourceLoader {
|
||||
private final TextSplitter characterTextSplitter;
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
PDDocument document = null;
|
||||
|
||||
@@ -14,7 +14,6 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.commons.io.FilenameUtils;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.common.core.utils.file.FileUtils;
|
||||
import org.ruoyi.common.oss.core.OssClient;
|
||||
import org.ruoyi.common.oss.entity.UploadResult;
|
||||
@@ -44,11 +43,11 @@ import java.util.regex.Pattern;
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
public class PdfMinerUFileLoader implements ResourceLoader {
|
||||
// 预编译正则表达式
|
||||
private static final Pattern MD_IMAGE_PATTERN = Pattern.compile("!\\[(.*?)]\\((.*?)(\\s*=\\d+)?\\)");
|
||||
private final TextSplitter characterTextSplitter;
|
||||
private final PdfProperties properties;
|
||||
private final SysOssMapper sysOssMapper;
|
||||
// 预编译正则表达式
|
||||
private static final Pattern MD_IMAGE_PATTERN = Pattern.compile("!\\[(.*?)]\\((.*?)(\\s*=\\d+)?\\)");
|
||||
// OCR图片识别线程池
|
||||
private final ThreadPoolExecutor ocrExecutor = new ThreadPoolExecutor(
|
||||
// 核心线程数
|
||||
@@ -63,6 +62,184 @@ public class PdfMinerUFileLoader implements ResourceLoader {
|
||||
new ThreadPoolExecutor.CallerRunsPolicy()
|
||||
);
|
||||
|
||||
/**
|
||||
* 创建临时PDF文件
|
||||
*
|
||||
* @param is 输入流
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static File createTempFile(InputStream is) throws IOException {
|
||||
File tempFile = File.createTempFile("upload_", ".pdf");
|
||||
Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
/**
|
||||
* 构建跨平台文件输出路径
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static Path buildOutputPath() throws IOException {
|
||||
Path basePath = isWindows() ?
|
||||
// Windows C盘用户路径下 minerUOutPut,避免其他盘符权限问题
|
||||
Paths.get(System.getProperty("user.home")).resolve("minerUOutPut") :
|
||||
Paths.get("/var/minerUOutPut");
|
||||
|
||||
if (!Files.exists(basePath)) {
|
||||
Files.createDirectories(basePath);
|
||||
}
|
||||
return basePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前操作系统是否为Windows
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private static boolean isWindows() {
|
||||
return System.getProperty("os.name").toLowerCase().contains("win");
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行命令
|
||||
*
|
||||
* @param condaEnv conda环境路径
|
||||
* @param inputFile 输入文件
|
||||
* @param outputPath 输出路径
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static Process buildProcess(String condaEnv, File inputFile, Path outputPath) throws IOException {
|
||||
ProcessBuilder pb = new ProcessBuilder();
|
||||
String[] command;
|
||||
|
||||
if (isWindows()) {
|
||||
command = new String[]{
|
||||
"cmd", "/c",
|
||||
"call", "conda", "activate",
|
||||
condaEnv.replace("\"", ""),
|
||||
"&&", "magic-pdf",
|
||||
"-p", inputFile.getAbsolutePath(),
|
||||
"-o", outputPath.toString()
|
||||
};
|
||||
} else {
|
||||
command = new String[]{
|
||||
"bash", "-c",
|
||||
String.format("source '%s/bin/activate' && magic-pdf -p '%s' -o '%s'",
|
||||
condaEnv,
|
||||
inputFile.getAbsolutePath(),
|
||||
outputPath.toString())
|
||||
};
|
||||
}
|
||||
|
||||
return pb.command(command)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
}
|
||||
|
||||
/**
|
||||
* 实时日志输出
|
||||
*
|
||||
* @param process 进程
|
||||
*/
|
||||
private static void logProcessOutput(Process process) {
|
||||
Executors.newSingleThreadExecutor().submit(() -> {
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream()))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
log.info("[PROCESS LOG] " + line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证转换结果
|
||||
*
|
||||
* @param inputFile 输入文件
|
||||
* @param outputPath 输出路径
|
||||
* @param exitCode 退出码
|
||||
* @return
|
||||
*/
|
||||
private static String verifyResult(File inputFile, Path outputPath, int exitCode) {
|
||||
String baseName = FilenameUtils.removeExtension(inputFile.getName());
|
||||
Path expectedMd = outputPath
|
||||
.resolve(baseName)
|
||||
.resolve("auto")
|
||||
.resolve(baseName + ".md");
|
||||
|
||||
if (exitCode == 0 && Files.exists(expectedMd)) {
|
||||
log.info("转换成功:{}", expectedMd.toString());
|
||||
return expectedMd.toString();
|
||||
}
|
||||
return String.format("转换失败(退出码%d)| 预期文件:%s", exitCode, expectedMd);
|
||||
}
|
||||
|
||||
/**
|
||||
* 多模态OCR识别图片内容
|
||||
*
|
||||
* @param imageUrl 图片URL
|
||||
* @return
|
||||
*/
|
||||
private static String imageUrlOCR(String imageUrl) {
|
||||
OpenAiChatModel model = OpenAiChatModel.builder()
|
||||
.apiKey("demo")
|
||||
.modelName("gpt-4o-mini")
|
||||
.baseUrl("http://langchain4j.dev/demo/openai/v1")
|
||||
.build();
|
||||
|
||||
UserMessage userMessage = UserMessage.from(
|
||||
TextContent.from(
|
||||
"请按以下逻辑处理图片:\n" +
|
||||
"1. 文字检测:识别图中所有可见文字(包括水印/标签),若无文字则跳至步骤3\n" +
|
||||
"2. 文字处理:\n" +
|
||||
" a. 对识别到的文字进行❗核心信息提炼\n" +
|
||||
" b. ❗禁止直接输出原文内容\n" +
|
||||
" c. 描述文字位置(如'顶部居中')、字体特征(颜色/大小)\n" +
|
||||
"3. 视觉描述:\n" +
|
||||
" a. 若无文字则用❗50字内简洁描述主体对象、场景、色彩搭配与画面氛围\n" +
|
||||
" b. 若有文字则补充说明文字与画面的关系\n" +
|
||||
"4. 输出规则:\n" +
|
||||
" - 最终输出为纯文本,格式:'[文字总结] 视觉描述 关键词:xx,xx'\n" +
|
||||
" - 关键词从内容中提取3个最具代表性的名词\n" +
|
||||
" - 无文字时格式:'[空] 简洁描述 关键词:xx,xx'"
|
||||
),
|
||||
ImageContent.from(imageUrl)
|
||||
);
|
||||
ChatResponse chat = model.chat(userMessage);
|
||||
AiMessage answer = chat.aiMessage();
|
||||
return answer.text();
|
||||
}
|
||||
|
||||
/**
|
||||
* 清理输出目录
|
||||
*
|
||||
* @param outputPath 输出目录
|
||||
*/
|
||||
private static void cleanOutputDirectory(Path outputPath) {
|
||||
if (Files.exists(outputPath)) {
|
||||
try {
|
||||
Files.walk(outputPath)
|
||||
// 按逆序删除(子目录先删)
|
||||
.sorted((p1, p2) -> -p1.compareTo(p2))
|
||||
.forEach(path -> {
|
||||
try {
|
||||
Files.delete(path);
|
||||
} catch (IOException e) {
|
||||
log.warn("清理输出目录失败: {}", path, e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
log.error("遍历输出目录失败", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
String content = "";
|
||||
@@ -127,127 +304,6 @@ public class PdfMinerUFileLoader implements ResourceLoader {
|
||||
return characterTextSplitter.split(content, kid);
|
||||
}
|
||||
|
||||
/**
|
||||
* 创建临时PDF文件
|
||||
*
|
||||
* @param is 输入流
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static File createTempFile(InputStream is) throws IOException {
|
||||
File tempFile = File.createTempFile("upload_", ".pdf");
|
||||
Files.copy(is, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
|
||||
return tempFile;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 构建跨平台文件输出路径
|
||||
*
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static Path buildOutputPath() throws IOException {
|
||||
Path basePath = isWindows() ?
|
||||
// Windows C盘用户路径下 minerUOutPut,避免其他盘符权限问题
|
||||
Paths.get(System.getProperty("user.home")).resolve("minerUOutPut") :
|
||||
Paths.get("/var/minerUOutPut");
|
||||
|
||||
if (!Files.exists(basePath)) {
|
||||
Files.createDirectories(basePath);
|
||||
}
|
||||
return basePath;
|
||||
}
|
||||
|
||||
/**
|
||||
* 判断当前操作系统是否为Windows
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private static boolean isWindows() {
|
||||
return System.getProperty("os.name").toLowerCase().contains("win");
|
||||
}
|
||||
|
||||
/**
|
||||
* 执行命令
|
||||
*
|
||||
* @param condaEnv conda环境路径
|
||||
* @param inputFile 输入文件
|
||||
* @param outputPath 输出路径
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
private static Process buildProcess(String condaEnv, File inputFile, Path outputPath) throws IOException {
|
||||
ProcessBuilder pb = new ProcessBuilder();
|
||||
String[] command;
|
||||
|
||||
if (isWindows()) {
|
||||
command = new String[]{
|
||||
"cmd", "/c",
|
||||
"call", "conda", "activate",
|
||||
condaEnv.replace("\"", ""),
|
||||
"&&", "magic-pdf",
|
||||
"-p", inputFile.getAbsolutePath(),
|
||||
"-o", outputPath.toString()
|
||||
};
|
||||
} else {
|
||||
command = new String[]{
|
||||
"bash", "-c",
|
||||
String.format("source '%s/bin/activate' && magic-pdf -p '%s' -o '%s'",
|
||||
condaEnv,
|
||||
inputFile.getAbsolutePath(),
|
||||
outputPath.toString())
|
||||
};
|
||||
}
|
||||
|
||||
return pb.command(command)
|
||||
.redirectErrorStream(true)
|
||||
.start();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 实时日志输出
|
||||
*
|
||||
* @param process 进程
|
||||
*/
|
||||
private static void logProcessOutput(Process process) {
|
||||
Executors.newSingleThreadExecutor().submit(() -> {
|
||||
try (BufferedReader reader = new BufferedReader(
|
||||
new InputStreamReader(process.getInputStream()))) {
|
||||
String line;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
log.info("[PROCESS LOG] " + line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* 验证转换结果
|
||||
*
|
||||
* @param inputFile 输入文件
|
||||
* @param outputPath 输出路径
|
||||
* @param exitCode 退出码
|
||||
* @return
|
||||
*/
|
||||
private static String verifyResult(File inputFile, Path outputPath, int exitCode) {
|
||||
String baseName = FilenameUtils.removeExtension(inputFile.getName());
|
||||
Path expectedMd = outputPath
|
||||
.resolve(baseName)
|
||||
.resolve("auto")
|
||||
.resolve(baseName + ".md");
|
||||
|
||||
if (exitCode == 0 && Files.exists(expectedMd)) {
|
||||
log.info("转换成功:{}", expectedMd.toString());
|
||||
return expectedMd.toString();
|
||||
}
|
||||
return String.format("转换失败(退出码%d)| 预期文件:%s", exitCode, expectedMd);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 正则匹配图片语法,多线程进行处理
|
||||
*
|
||||
@@ -300,7 +356,6 @@ public class PdfMinerUFileLoader implements ResourceLoader {
|
||||
return sb;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 图片处理任务
|
||||
*
|
||||
@@ -375,43 +430,6 @@ public class PdfMinerUFileLoader implements ResourceLoader {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 多模态OCR识别图片内容
|
||||
*
|
||||
* @param imageUrl 图片URL
|
||||
* @return
|
||||
*/
|
||||
private static String imageUrlOCR(String imageUrl) {
|
||||
OpenAiChatModel model = OpenAiChatModel.builder()
|
||||
.apiKey("demo")
|
||||
.modelName("gpt-4o-mini")
|
||||
.baseUrl("http://langchain4j.dev/demo/openai/v1")
|
||||
.build();
|
||||
|
||||
UserMessage userMessage = UserMessage.from(
|
||||
TextContent.from(
|
||||
"请按以下逻辑处理图片:\n" +
|
||||
"1. 文字检测:识别图中所有可见文字(包括水印/标签),若无文字则跳至步骤3\n" +
|
||||
"2. 文字处理:\n" +
|
||||
" a. 对识别到的文字进行❗核心信息提炼\n" +
|
||||
" b. ❗禁止直接输出原文内容\n" +
|
||||
" c. 描述文字位置(如'顶部居中')、字体特征(颜色/大小)\n" +
|
||||
"3. 视觉描述:\n" +
|
||||
" a. 若无文字则用❗50字内简洁描述主体对象、场景、色彩搭配与画面氛围\n" +
|
||||
" b. 若有文字则补充说明文字与画面的关系\n" +
|
||||
"4. 输出规则:\n" +
|
||||
" - 最终输出为纯文本,格式:'[文字总结] 视觉描述 关键词:xx,xx'\n" +
|
||||
" - 关键词从内容中提取3个最具代表性的名词\n" +
|
||||
" - 无文字时格式:'[空] 简洁描述 关键词:xx,xx'"
|
||||
),
|
||||
ImageContent.from(imageUrl)
|
||||
);
|
||||
ChatResponse chat = model.chat(userMessage);
|
||||
AiMessage answer = chat.aiMessage();
|
||||
return answer.text();
|
||||
}
|
||||
|
||||
/**
|
||||
* 静态内部类保存图片匹配信息
|
||||
*/
|
||||
@@ -421,29 +439,4 @@ public class PdfMinerUFileLoader implements ResourceLoader {
|
||||
int start; // 匹配起始位置
|
||||
int end; // 匹配结束位置
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 清理输出目录
|
||||
*
|
||||
* @param outputPath 输出目录
|
||||
*/
|
||||
private static void cleanOutputDirectory(Path outputPath) {
|
||||
if (Files.exists(outputPath)) {
|
||||
try {
|
||||
Files.walk(outputPath)
|
||||
// 按逆序删除(子目录先删)
|
||||
.sorted((p1, p2) -> -p1.compareTo(p2))
|
||||
.forEach(path -> {
|
||||
try {
|
||||
Files.delete(path);
|
||||
} catch (IOException e) {
|
||||
log.warn("清理输出目录失败: {}", path, e);
|
||||
}
|
||||
});
|
||||
} catch (IOException e) {
|
||||
log.error("遍历输出目录失败", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,11 +2,7 @@ package org.ruoyi.chain.loader;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.ruoyi.chain.split.*;
|
||||
|
||||
import org.ruoyi.config.properties.PdfProperties;
|
||||
import org.ruoyi.chain.split.CharacterTextSplitter;
|
||||
import org.ruoyi.chain.split.CodeTextSplitter;
|
||||
import org.ruoyi.chain.split.MarkdownTextSplitter;
|
||||
import org.ruoyi.constant.FileType;
|
||||
import org.ruoyi.system.mapper.SysOssMapper;
|
||||
import org.springframework.stereotype.Component;
|
||||
@@ -23,22 +19,22 @@ public class ResourceLoaderFactory {
|
||||
private final SysOssMapper sysOssMapper;
|
||||
|
||||
|
||||
public ResourceLoader getLoaderByFileType(String fileType){
|
||||
if (FileType.isTextFile(fileType)){
|
||||
public ResourceLoader getLoaderByFileType(String fileType) {
|
||||
if (FileType.isTextFile(fileType)) {
|
||||
return new TextFileLoader(characterTextSplitter);
|
||||
} else if (FileType.isWord(fileType)) {
|
||||
return new WordLoader(characterTextSplitter);
|
||||
} else if (FileType.isPdf(fileType) && pdfProperties.getTransition().isEnableMinerU()) {
|
||||
return new PdfMinerUFileLoader(characterTextSplitter,pdfProperties,sysOssMapper);
|
||||
return new PdfMinerUFileLoader(characterTextSplitter, pdfProperties, sysOssMapper);
|
||||
} else if (FileType.isPdf(fileType)) {
|
||||
return new PdfFileLoader(characterTextSplitter);
|
||||
}else if (FileType.isMdFile(fileType)) {
|
||||
} else if (FileType.isMdFile(fileType)) {
|
||||
return new MarkDownFileLoader(markdownTextSplitter);
|
||||
}else if (FileType.isCodeFile(fileType)) {
|
||||
} else if (FileType.isCodeFile(fileType)) {
|
||||
return new CodeFileLoader(codeTextSplitter);
|
||||
} else if (FileType.isExcel(fileType)) {
|
||||
return new ExcelFileLoader(excelTextSplitter);
|
||||
}else {
|
||||
} else {
|
||||
return new TextFileLoader(characterTextSplitter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,29 +9,30 @@ import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class TextFileLoader implements ResourceLoader{
|
||||
public class TextFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
StringBuffer stringBuffer = new StringBuffer();
|
||||
try (InputStreamReader reader = new InputStreamReader(inputStream, "UTF-8");
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)){
|
||||
String line;
|
||||
while ((line = bufferedReader.readLine()) != null) {
|
||||
stringBuffer.append(line).append("\n");
|
||||
}
|
||||
String stringBuffer = "";
|
||||
try (InputStreamReader reader = new InputStreamReader(inputStream, StandardCharsets.UTF_8);
|
||||
BufferedReader bufferedReader = new BufferedReader(reader)) {
|
||||
stringBuffer = bufferedReader.lines().collect(Collectors.joining());
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return stringBuffer.toString();
|
||||
return stringBuffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChunkList(String content, String kid){
|
||||
public List<String> getChunkList(String content, String kid) {
|
||||
return textSplitter.split(content, kid);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,6 +16,7 @@ import java.util.List;
|
||||
@Slf4j
|
||||
public class WordLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
XWPFDocument document = null;
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
package org.ruoyi.chain.split;
|
||||
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.ArrayList;
|
||||
@@ -16,7 +12,7 @@ import java.util.List;
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelTextSplitter implements TextSplitter{
|
||||
public class ExcelTextSplitter implements TextSplitter {
|
||||
|
||||
|
||||
@Override
|
||||
|
||||
@@ -9,7 +9,7 @@ import java.util.List;
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class MarkdownTextSplitter implements TextSplitter{
|
||||
public class MarkdownTextSplitter implements TextSplitter {
|
||||
@Override
|
||||
public List<String> split(String content, String kid) {
|
||||
return null;
|
||||
|
||||
@@ -39,62 +39,56 @@ public class FileType {
|
||||
public static final String YAML = "yaml";
|
||||
public static final String YML = "yml";
|
||||
|
||||
public static boolean isTextFile(String type){
|
||||
public static boolean isTextFile(String type) {
|
||||
if (type.equalsIgnoreCase(TXT) || type.equalsIgnoreCase(CSV) || type.equalsIgnoreCase(PROPERTIES)
|
||||
|| type.equalsIgnoreCase(INI) || type.equalsIgnoreCase(YAML) || type.equalsIgnoreCase(YML)
|
||||
|| type.equalsIgnoreCase(LOG) || type.equalsIgnoreCase(XML)){
|
||||
|| type.equalsIgnoreCase(LOG) || type.equalsIgnoreCase(XML)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isCodeFile(String type){
|
||||
public static boolean isCodeFile(String type) {
|
||||
if (type.equalsIgnoreCase(JAVA) || type.equalsIgnoreCase(HTML) || type.equalsIgnoreCase(HTM) || type.equalsIgnoreCase(JS) || type.equalsIgnoreCase(PY)
|
||||
|| type.equalsIgnoreCase(CPP) || type.equalsIgnoreCase(SQL) || type.equalsIgnoreCase(PHP) || type.equalsIgnoreCase(RUBY)
|
||||
|| type.equalsIgnoreCase(C) || type.equalsIgnoreCase(H) || type.equalsIgnoreCase(HPP) || type.equalsIgnoreCase(SWIFT)
|
||||
|| type.equalsIgnoreCase(TS) || type.equalsIgnoreCase(RUST) || type.equalsIgnoreCase(PERL) || type.equalsIgnoreCase(SHELL)
|
||||
|| type.equalsIgnoreCase(BAT) || type.equalsIgnoreCase(CMD) || type.equalsIgnoreCase(CSS)){
|
||||
|| type.equalsIgnoreCase(BAT) || type.equalsIgnoreCase(CMD) || type.equalsIgnoreCase(CSS)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isMdFile(String type){
|
||||
if (type.equalsIgnoreCase(MD)){
|
||||
public static boolean isMdFile(String type) {
|
||||
if (type.equalsIgnoreCase(MD)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isWord(String type){
|
||||
if (type.equalsIgnoreCase(DOC) || type.equalsIgnoreCase(DOCX)){
|
||||
public static boolean isWord(String type) {
|
||||
if (type.equalsIgnoreCase(DOC) || type.equalsIgnoreCase(DOCX)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isPdf(String type){
|
||||
if (type.equalsIgnoreCase(PDF)){
|
||||
public static boolean isPdf(String type) {
|
||||
if (type.equalsIgnoreCase(PDF)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean isExcel(String type){
|
||||
if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){
|
||||
public static boolean isExcel(String type) {
|
||||
if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)) {
|
||||
return true;
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,66 +19,65 @@ import java.io.Serial;
|
||||
@TableName("knowledge_attach")
|
||||
public class KnowledgeAttach extends BaseEntity {
|
||||
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@TableId(value = "id")
|
||||
private Long id;
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@TableId(value = "id")
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
private String kid;
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
private String kid;
|
||||
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
private String docId;
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
private String docId;
|
||||
|
||||
/**
|
||||
* 文档名称
|
||||
*/
|
||||
private String docName;
|
||||
/**
|
||||
* 文档名称
|
||||
*/
|
||||
private String docName;
|
||||
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
private String docType;
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
private String docType;
|
||||
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
private String content;
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
private String content;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
private String remark;
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
private String remark;
|
||||
|
||||
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
private Long ossId;
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
private Long ossId;
|
||||
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer picStatus;
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer picStatus;
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer vectorStatus;
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer vectorStatus;
|
||||
|
||||
}
|
||||
|
||||
@@ -21,72 +21,72 @@ import org.ruoyi.domain.KnowledgeAttach;
|
||||
@AutoMapper(target = KnowledgeAttach.class, reverseConvertGenerate = false)
|
||||
public class KnowledgeAttachBo extends BaseEntity {
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@NotNull(message = "不能为空", groups = {EditGroup.class})
|
||||
private Long id;
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@NotNull(message = "不能为空", groups = {EditGroup.class})
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String kid;
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String kid;
|
||||
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docId;
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docId;
|
||||
|
||||
/**
|
||||
* 文档名称
|
||||
*/
|
||||
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docName;
|
||||
/**
|
||||
* 文档名称
|
||||
*/
|
||||
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docName;
|
||||
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docType;
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docType;
|
||||
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String content;
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String content;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String remark;
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String remark;
|
||||
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Long ossId;
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Long ossId;
|
||||
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "拆解图片状态10未开始,20进行中,30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Integer picStatus;
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "拆解图片状态10未开始,20进行中,30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Integer picStatus;
|
||||
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "分析图片状态10未开始,20进行中,30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Integer picAnysStatus;
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "分析图片状态10未开始,20进行中,30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "写入向量数据库状态10未开始,20进行中,30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Integer vectorStatus;
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "写入向量数据库状态10未开始,20进行中,30已完成不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Integer vectorStatus;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -24,43 +24,43 @@ public class KnowledgeFragmentBo extends BaseEntity {
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@NotNull(message = "不能为空", groups = { EditGroup.class })
|
||||
@NotNull(message = "不能为空", groups = {EditGroup.class})
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
@NotBlank(message = "知识库ID不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "知识库ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String kid;
|
||||
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
@NotBlank(message = "文档ID不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "文档ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String docId;
|
||||
|
||||
/**
|
||||
* 知识片段ID
|
||||
*/
|
||||
@NotBlank(message = "知识片段ID不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "知识片段ID不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String fid;
|
||||
|
||||
/**
|
||||
* 片段索引下标
|
||||
*/
|
||||
@NotNull(message = "片段索引下标不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotNull(message = "片段索引下标不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Long idx;
|
||||
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
@NotBlank(message = "文档内容不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String content;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
@NotBlank(message = "备注不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String remark;
|
||||
|
||||
|
||||
|
||||
@@ -22,33 +22,33 @@ import org.ruoyi.domain.KnowledgeInfo;
|
||||
public class KnowledgeInfoBo extends BaseEntity {
|
||||
|
||||
/**
|
||||
* 主键
|
||||
* 主键
|
||||
*/
|
||||
@NotNull(message = "不能为空", groups = { EditGroup.class })
|
||||
@NotNull(message = "不能为空", groups = {EditGroup.class})
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
@NotBlank(message = "知识库ID不能为空", groups = {EditGroup.class })
|
||||
@NotBlank(message = "知识库ID不能为空", groups = {EditGroup.class})
|
||||
private String kid;
|
||||
|
||||
/**
|
||||
* 用户ID
|
||||
*/
|
||||
@NotNull(message = "用户ID不能为空", groups = {EditGroup.class })
|
||||
@NotNull(message = "用户ID不能为空", groups = {EditGroup.class})
|
||||
private Long uid;
|
||||
|
||||
/**
|
||||
* 知识库名称
|
||||
*/
|
||||
@NotBlank(message = "知识库名称不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "知识库名称不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String kname;
|
||||
|
||||
/**
|
||||
* 是否公开知识库(0 否 1是)
|
||||
*/
|
||||
@NotNull(message = "是否公开知识库(0 否 1是)不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotNull(message = "是否公开知识库(0 否 1是)不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Integer share;
|
||||
|
||||
/**
|
||||
@@ -74,19 +74,19 @@ public class KnowledgeInfoBo extends BaseEntity {
|
||||
/**
|
||||
* 知识库中检索的条数
|
||||
*/
|
||||
@NotNull(message = "知识库中检索的条数不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotNull(message = "知识库中检索的条数不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Long retrieveLimit;
|
||||
|
||||
/**
|
||||
* 文本块大小
|
||||
*/
|
||||
@NotNull(message = "文本块大小不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotNull(message = "文本块大小不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Long textBlockSize;
|
||||
|
||||
/**
|
||||
* 向量库模型名称
|
||||
*/
|
||||
@NotBlank(message = "向量库不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
@NotBlank(message = "向量库不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String vectorModelName;
|
||||
|
||||
/**
|
||||
|
||||
@@ -5,6 +5,7 @@ import lombok.Data;
|
||||
|
||||
/**
|
||||
* 查询向量所需参数
|
||||
*
|
||||
* @author ageer
|
||||
*/
|
||||
@Data
|
||||
|
||||
@@ -6,6 +6,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* 保存向量所需参数
|
||||
*
|
||||
* @author ageer
|
||||
*/
|
||||
@Data
|
||||
|
||||
@@ -21,74 +21,73 @@ import java.io.Serializable;
|
||||
@AutoMapper(target = KnowledgeAttach.class)
|
||||
public class KnowledgeAttachVo implements Serializable {
|
||||
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
@Serial
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@ExcelProperty(value = "")
|
||||
private Long id;
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@ExcelProperty(value = "")
|
||||
private Long id;
|
||||
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
@ExcelProperty(value = "知识库ID")
|
||||
private String kid;
|
||||
/**
|
||||
* 知识库ID
|
||||
*/
|
||||
@ExcelProperty(value = "知识库ID")
|
||||
private String kid;
|
||||
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
@ExcelProperty(value = "文档ID")
|
||||
private String docId;
|
||||
/**
|
||||
* 文档ID
|
||||
*/
|
||||
@ExcelProperty(value = "文档ID")
|
||||
private String docId;
|
||||
|
||||
/**
|
||||
* 文档名称
|
||||
*/
|
||||
@ExcelProperty(value = "文档名称")
|
||||
private String docName;
|
||||
/**
|
||||
* 文档名称
|
||||
*/
|
||||
@ExcelProperty(value = "文档名称")
|
||||
private String docName;
|
||||
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
@ExcelProperty(value = "文档类型")
|
||||
private String docType;
|
||||
/**
|
||||
* 文档类型
|
||||
*/
|
||||
@ExcelProperty(value = "文档类型")
|
||||
private String docType;
|
||||
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
@ExcelProperty(value = "文档内容")
|
||||
private String content;
|
||||
/**
|
||||
* 文档内容
|
||||
*/
|
||||
@ExcelProperty(value = "文档内容")
|
||||
private String content;
|
||||
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
@ExcelProperty(value = "备注")
|
||||
private String remark;
|
||||
/**
|
||||
* 备注
|
||||
*/
|
||||
@ExcelProperty(value = "备注")
|
||||
private String remark;
|
||||
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
@ExcelProperty(value = "对象存储主键")
|
||||
private Long ossId;
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
@ExcelProperty(value = "对象存储主键")
|
||||
private Long ossId;
|
||||
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "拆解图片状态10未开始,20进行中,30已完成")
|
||||
private Integer picStatus;
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "拆解图片状态10未开始,20进行中,30已完成")
|
||||
private Integer picStatus;
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "分析图片状态10未开始,20进行中,30已完成")
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "分析图片状态10未开始,20进行中,30已完成")
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "写入向量数据库状态10未开始,20进行中,30已完成")
|
||||
private Integer vectorStatus;
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "写入向量数据库状态10未开始,20进行中,30已完成")
|
||||
private Integer vectorStatus;
|
||||
}
|
||||
|
||||
@@ -11,8 +11,6 @@ import java.io.Serial;
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 知识片段视图对象 knowledge_fragment
|
||||
*
|
||||
|
||||
@@ -13,8 +13,6 @@ import java.io.Serial;
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 知识库视图对象 knowledge_info
|
||||
*
|
||||
|
||||
@@ -13,12 +13,14 @@ import java.util.Set;
|
||||
public interface BaseEmbedModelService extends EmbeddingModel {
|
||||
/**
|
||||
* 根据配置信息配置嵌入模型
|
||||
*
|
||||
* @param config 包含模型配置信息的 ChatModelVo 对象
|
||||
*/
|
||||
void configure(ChatModelVo config);
|
||||
|
||||
/**
|
||||
* 获取当前嵌入模型支持的所有模态类型
|
||||
*
|
||||
* @return 返回支持的模态类型集合
|
||||
*/
|
||||
Set<ModalityType> getSupportedModalities();
|
||||
|
||||
@@ -34,7 +34,7 @@ public class EmbeddingModelFactory {
|
||||
* 如果模型已存在于缓存中,则直接返回;否则创建新的实例
|
||||
*
|
||||
* @param embeddingModelName 嵌入模型名称
|
||||
* @param dimension 模型维度大小
|
||||
* @param dimension 模型维度大小
|
||||
*/
|
||||
public BaseEmbedModelService createModel(String embeddingModelName, Integer dimension) {
|
||||
return modelCache.computeIfAbsent(embeddingModelName, name -> {
|
||||
@@ -81,7 +81,7 @@ public class EmbeddingModelFactory {
|
||||
* @param embeddingModelId 嵌入模型的唯一标识ID
|
||||
*/
|
||||
public void refreshModel(Long embeddingModelId) {
|
||||
// 从模型缓存中移除指定ID的模型
|
||||
// 从模型缓存中移除指定ID的模型
|
||||
modelCache.remove(embeddingModelId);
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ public class EmbeddingModelFactory {
|
||||
* 根据提供的工厂名称和配置信息创建并配置模型实例
|
||||
*
|
||||
* @param factory 工厂名称,用于标识模型类型
|
||||
* @param config 模型配置信息
|
||||
* @param config 模型配置信息
|
||||
* @return BaseEmbedModelService 配置好的模型实例
|
||||
* @throws IllegalArgumentException 当无法获取指定的模型实例时抛出
|
||||
*/
|
||||
|
||||
@@ -12,6 +12,7 @@ import org.ruoyi.embedding.model.MultiModalInput;
|
||||
public interface MultiModalEmbedModelService extends BaseEmbedModelService {
|
||||
/**
|
||||
* 将图像数据转换为嵌入向量
|
||||
*
|
||||
* @param imageDataUrl 图像的地址,必须是公开可访问的URL
|
||||
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
|
||||
*/
|
||||
@@ -19,6 +20,7 @@ public interface MultiModalEmbedModelService extends BaseEmbedModelService {
|
||||
|
||||
/**
|
||||
* 将视频数据转换为嵌入向量
|
||||
*
|
||||
* @param videoDataUrl 视频的地址,必须是公开可访问的URL
|
||||
* @return 包含嵌入向量的响应对象,可能包含状态信息和嵌入结果
|
||||
*/
|
||||
|
||||
@@ -1,14 +1,50 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
|
||||
import dev.langchain4j.community.model.dashscope.QwenEmbeddingModel;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午3:00
|
||||
* @Description: 阿里百炼基础嵌入模型(兼容openai)
|
||||
*/
|
||||
@Component("alibailian")
|
||||
public class AliBaiLianBaseEmbedProvider extends OpenAiEmbeddingProvider{
|
||||
public class AliBaiLianBaseEmbedProvider extends OpenAiEmbeddingProvider {
|
||||
|
||||
|
||||
private ChatModelVo chatModelVo;
|
||||
|
||||
@Override
|
||||
public void configure(ChatModelVo config) {
|
||||
this.chatModelVo = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<ModalityType> getSupportedModalities() {
|
||||
return Set.of();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Response<List<Embedding>> embedAll(List<TextSegment> textSegments) {
|
||||
return QwenEmbeddingModel.builder()
|
||||
// todo 测试 后面要改
|
||||
// .baseUrl(chatModelVo.getApiHost())
|
||||
.apiKey(chatModelVo.getApiKey())
|
||||
.modelName(chatModelVo.getModelName())
|
||||
|
||||
.dimension(1024)
|
||||
// .dimension(chatModelVo.getDimension())
|
||||
.build()
|
||||
.embedAll(textSegments);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -26,9 +26,8 @@ import java.util.concurrent.TimeUnit;
|
||||
@Component("bailianMultiModel")
|
||||
@Slf4j
|
||||
public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelService {
|
||||
private ChatModelVo chatModelVo;
|
||||
|
||||
private final OkHttpClient okHttpClient;
|
||||
private ChatModelVo chatModelVo;
|
||||
|
||||
/**
|
||||
* 构造函数,初始化HTTP客户端
|
||||
@@ -44,6 +43,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 图像嵌入向量生成
|
||||
*
|
||||
* @param imageDataUrl 图像数据的URL
|
||||
* @return 包含图像嵌入向量的Response对象
|
||||
*/
|
||||
@@ -54,6 +54,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 视频嵌入向量生成
|
||||
*
|
||||
* @param videoDataUrl 视频数据的URL
|
||||
* @return 包含视频嵌入向量的Response对象
|
||||
*/
|
||||
@@ -65,6 +66,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
/**
|
||||
* 多模态嵌入向量生成
|
||||
* 支持同时处理文本、图像和视频等多种模态的数据
|
||||
*
|
||||
* @param input 包含多种模态输入的对象
|
||||
* @return 包含多模态嵌入向量的Response对象
|
||||
*/
|
||||
@@ -101,6 +103,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 配置模型参数
|
||||
*
|
||||
* @param config 模型配置信息
|
||||
*/
|
||||
@Override
|
||||
@@ -110,6 +113,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 获取支持的模态类型
|
||||
*
|
||||
* @return 支持的模态类型集合
|
||||
*/
|
||||
@Override
|
||||
@@ -119,6 +123,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 批量文本嵌入向量生成
|
||||
*
|
||||
* @param textSegments 文本段列表
|
||||
* @return 包含所有文本嵌入向量的Response对象
|
||||
*/
|
||||
@@ -144,7 +149,8 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 单模态嵌入(图片/视频/单条文本)复用方法
|
||||
* @param key 模态类型(image/video/text)
|
||||
*
|
||||
* @param key 模态类型(image/video/text)
|
||||
* @param dataUrl 数据URL
|
||||
* @return 包含嵌入向量的Response对象
|
||||
*/
|
||||
@@ -171,7 +177,8 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 构建请求对象
|
||||
* @param contents 请求内容列表
|
||||
*
|
||||
* @param contents 请求内容列表
|
||||
* @param chatModelVo 模型配置信息
|
||||
* @return 构建好的请求对象
|
||||
*/
|
||||
@@ -182,7 +189,8 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 执行 HTTP 请求并解析响应
|
||||
* @param request 请求对象
|
||||
*
|
||||
* @param request 请求对象
|
||||
* @param chatModelVo 模型配置信息
|
||||
* @return API响应对象
|
||||
* @throws IOException IO异常
|
||||
@@ -212,6 +220,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 解析嵌入向量列表
|
||||
*
|
||||
* @param responseBody API响应的JSON字符串
|
||||
* @return 嵌入向量响应对象
|
||||
* @throws IOException IO异常
|
||||
@@ -223,6 +232,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 构建 API 请求内容 Map
|
||||
*
|
||||
* @param input 多模态输入对象
|
||||
* @return 包含各种模态内容的Map列表
|
||||
*/
|
||||
@@ -247,6 +257,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
|
||||
/**
|
||||
* 将 API 原始响应解析为 LangChain4j 的 Response<Embedding>
|
||||
*
|
||||
* @param resp API原始响应对象
|
||||
* @return 包含嵌入向量和token使用情况的Response对象
|
||||
*/
|
||||
@@ -272,7 +283,7 @@ public class AliBaiLianMultiEmbeddingProvider implements MultiModalEmbedModelSer
|
||||
tokenUsage = new TokenUsage(
|
||||
resp.usage().input_tokens(),
|
||||
resp.usage().image_tokens(),
|
||||
resp.usage().input_tokens() +resp.usage().image_tokens()
|
||||
resp.usage().input_tokens() + resp.usage().image_tokens()
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
@@ -25,6 +25,7 @@ public class OllamaEmbeddingProvider implements BaseEmbedModelService {
|
||||
public void configure(ChatModelVo config) {
|
||||
this.chatModelVo = config;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<ModalityType> getSupportedModalities() {
|
||||
return Set.of(ModalityType.TEXT);
|
||||
|
||||
@@ -1,12 +1,8 @@
|
||||
package org.ruoyi.embedding.impl;
|
||||
|
||||
|
||||
import org.ruoyi.embedding.BaseEmbedModelService;
|
||||
import org.ruoyi.embedding.model.ModalityType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* @Author: Robust_H
|
||||
* @Date: 2025-09-30-下午3:59
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
package org.ruoyi.embedding.model;
|
||||
|
||||
import org.ruoyi.common.json.utils.JsonUtils;
|
||||
import lombok.Data;
|
||||
import org.ruoyi.common.json.utils.JsonUtils;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -16,14 +16,6 @@ public class AliyunMultiModalEmbedRequest {
|
||||
private String model;
|
||||
private Input input;
|
||||
|
||||
/**
|
||||
* 表示输入数据的记录类(Record)
|
||||
* 该类用于封装一个包含多个映射关系列表的输入数据结构
|
||||
*
|
||||
* @param contents 包含多个Map的列表,每个Map中存储String类型的键和Object类型的值
|
||||
*/
|
||||
public record Input(List<Map<String, Object>> contents) { }
|
||||
|
||||
/**
|
||||
* 创建请求对象
|
||||
*/
|
||||
@@ -41,4 +33,13 @@ public class AliyunMultiModalEmbedRequest {
|
||||
public String toJson() {
|
||||
return JsonUtils.toJsonString(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* 表示输入数据的记录类(Record)
|
||||
* 该类用于封装一个包含多个映射关系列表的输入数据结构
|
||||
*
|
||||
* @param contents 包含多个Map的列表,每个Map中存储String类型的键和Object类型的值
|
||||
*/
|
||||
public record Input(List<Map<String, Object>> contents) {
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,7 @@ public interface KnowledgeInfoMapper extends BaseMapperPlus<KnowledgeInfo, Knowl
|
||||
|
||||
/**
|
||||
* 根据kid查询知识库
|
||||
*
|
||||
* @param kid 知识库id
|
||||
* @return KnowledgeInfo
|
||||
*/
|
||||
|
||||
@@ -56,7 +56,7 @@ public interface IKnowledgeAttachService {
|
||||
/**
|
||||
* 翻译文件
|
||||
*
|
||||
* @param file 文件
|
||||
* @param file 文件
|
||||
* @param targetLanguage 目标语音
|
||||
*/
|
||||
String translationByFile(MultipartFile file, String targetLanguage);
|
||||
|
||||
@@ -8,6 +8,7 @@ import java.util.List;
|
||||
|
||||
/**
|
||||
* 向量库管理
|
||||
*
|
||||
* @author ageer
|
||||
*/
|
||||
public interface VectorStoreService {
|
||||
@@ -18,7 +19,7 @@ public interface VectorStoreService {
|
||||
|
||||
void createSchema(String kid, String embeddingModelName);
|
||||
|
||||
void removeById(String id,String modelName) throws ServiceException;
|
||||
void removeById(String id, String modelName) throws ServiceException;
|
||||
|
||||
void removeByDocId(String docId, String kid) throws ServiceException;
|
||||
|
||||
|
||||
@@ -32,101 +32,100 @@ import java.util.Map;
|
||||
@Service
|
||||
public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
|
||||
|
||||
private final KnowledgeAttachMapper baseMapper;
|
||||
private final KnowledgeFragmentMapper fragmentMapper;
|
||||
private final KnowledgeAttachMapper baseMapper;
|
||||
private final KnowledgeFragmentMapper fragmentMapper;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 查询知识库附件
|
||||
*/
|
||||
@Override
|
||||
public KnowledgeAttachVo queryById(Long id) {
|
||||
return baseMapper.selectVoById(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询知识库附件列表
|
||||
*/
|
||||
@Override
|
||||
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
|
||||
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
|
||||
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
|
||||
return TableDataInfo.build(result);
|
||||
}
|
||||
|
||||
/**
|
||||
* 查询知识库附件列表
|
||||
*/
|
||||
@Override
|
||||
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
|
||||
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
|
||||
return baseMapper.selectVoList(lqw);
|
||||
}
|
||||
|
||||
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
|
||||
Map<String, Object> params = bo.getParams();
|
||||
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
|
||||
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
|
||||
return lqw;
|
||||
}
|
||||
|
||||
/**
|
||||
* 新增知识库附件
|
||||
*/
|
||||
@Override
|
||||
public Boolean insertByBo(KnowledgeAttachBo bo) {
|
||||
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
|
||||
validEntityBeforeSave(add);
|
||||
boolean flag = baseMapper.insert(add) > 0;
|
||||
if (flag) {
|
||||
bo.setId(add.getId());
|
||||
/**
|
||||
* 查询知识库附件
|
||||
*/
|
||||
@Override
|
||||
public KnowledgeAttachVo queryById(Long id) {
|
||||
return baseMapper.selectVoById(id);
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
|
||||
/**
|
||||
* 修改知识库附件
|
||||
*/
|
||||
@Override
|
||||
public Boolean updateByBo(KnowledgeAttachBo bo) {
|
||||
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
|
||||
validEntityBeforeSave(update);
|
||||
return baseMapper.updateById(update) > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存前的数据校验
|
||||
*/
|
||||
private void validEntityBeforeSave(KnowledgeAttach entity) {
|
||||
//TODO 做一些数据校验,如唯一约束
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量删除知识库附件
|
||||
*/
|
||||
@Override
|
||||
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
|
||||
if (isValid) {
|
||||
//TODO 做一些业务上的校验,判断是否需要校验
|
||||
/**
|
||||
* 查询知识库附件列表
|
||||
*/
|
||||
@Override
|
||||
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
|
||||
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
|
||||
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
|
||||
return TableDataInfo.build(result);
|
||||
}
|
||||
return baseMapper.deleteBatchIds(ids) > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeKnowledgeAttach(String docId) {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("doc_id", docId);
|
||||
baseMapper.deleteByMap(map);
|
||||
fragmentMapper.deleteByMap(map);
|
||||
}
|
||||
/**
|
||||
* 查询知识库附件列表
|
||||
*/
|
||||
@Override
|
||||
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
|
||||
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
|
||||
return baseMapper.selectVoList(lqw);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String translationByFile(MultipartFile file, String targetLanguage) {
|
||||
return "接口开发中!";
|
||||
}
|
||||
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
|
||||
Map<String, Object> params = bo.getParams();
|
||||
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
|
||||
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
|
||||
return lqw;
|
||||
}
|
||||
|
||||
/**
|
||||
* 新增知识库附件
|
||||
*/
|
||||
@Override
|
||||
public Boolean insertByBo(KnowledgeAttachBo bo) {
|
||||
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
|
||||
validEntityBeforeSave(add);
|
||||
boolean flag = baseMapper.insert(add) > 0;
|
||||
if (flag) {
|
||||
bo.setId(add.getId());
|
||||
}
|
||||
return flag;
|
||||
}
|
||||
|
||||
/**
|
||||
* 修改知识库附件
|
||||
*/
|
||||
@Override
|
||||
public Boolean updateByBo(KnowledgeAttachBo bo) {
|
||||
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
|
||||
validEntityBeforeSave(update);
|
||||
return baseMapper.updateById(update) > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* 保存前的数据校验
|
||||
*/
|
||||
private void validEntityBeforeSave(KnowledgeAttach entity) {
|
||||
//TODO 做一些数据校验,如唯一约束
|
||||
}
|
||||
|
||||
/**
|
||||
* 批量删除知识库附件
|
||||
*/
|
||||
@Override
|
||||
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
|
||||
if (isValid) {
|
||||
//TODO 做一些业务上的校验,判断是否需要校验
|
||||
}
|
||||
return baseMapper.deleteBatchIds(ids) > 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeKnowledgeAttach(String docId) {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("doc_id", docId);
|
||||
baseMapper.deleteByMap(map);
|
||||
fragmentMapper.deleteByMap(map);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String translationByFile(MultipartFile file, String targetLanguage) {
|
||||
return "接口开发中!";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
|
||||
* 查询知识片段
|
||||
*/
|
||||
@Override
|
||||
public KnowledgeFragmentVo queryById(Long id){
|
||||
public KnowledgeFragmentVo queryById(Long id) {
|
||||
return baseMapper.selectVoById(id);
|
||||
}
|
||||
|
||||
@@ -96,7 +96,7 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
|
||||
/**
|
||||
* 保存前的数据校验
|
||||
*/
|
||||
private void validEntityBeforeSave(KnowledgeFragment entity){
|
||||
private void validEntityBeforeSave(KnowledgeFragment entity) {
|
||||
//TODO 做一些数据校验,如唯一约束
|
||||
}
|
||||
|
||||
@@ -105,7 +105,7 @@ public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
|
||||
*/
|
||||
@Override
|
||||
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
|
||||
if(isValid){
|
||||
if (isValid) {
|
||||
//TODO 做一些业务上的校验,判断是否需要校验
|
||||
}
|
||||
return baseMapper.deleteBatchIds(ids) > 0;
|
||||
|
||||
@@ -8,7 +8,8 @@ import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.strategy.VectorStoreStrategyFactory;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.stereotype.Service;
|
||||
import java.util.*;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* 向量库管理
|
||||
|
||||
@@ -1,16 +1,12 @@
|
||||
package org.ruoyi.service.strategy;
|
||||
|
||||
import org.ruoyi.common.core.exception.ServiceException;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
|
||||
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.config.VectorStoreProperties;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.embedding.EmbeddingModelFactory;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
|
||||
/**
|
||||
* 向量库策略抽象基类
|
||||
@@ -24,15 +20,7 @@ public abstract class AbstractVectorStoreStrategy implements VectorStoreService
|
||||
|
||||
protected final VectorStoreProperties vectorStoreProperties;
|
||||
|
||||
private final EmbeddingModelFactory embeddingModelFactory;
|
||||
|
||||
/**
|
||||
* 获取向量模型
|
||||
*/
|
||||
@SneakyThrows
|
||||
protected EmbeddingModel getEmbeddingModel(String modelName, Integer dimension) {
|
||||
return embeddingModelFactory.createModel(modelName, dimension);
|
||||
}
|
||||
private final EmbeddingModelFactory embeddingModelFactory;
|
||||
|
||||
/**
|
||||
* 将float数组转换为Float对象数组
|
||||
@@ -45,6 +33,14 @@ public abstract class AbstractVectorStoreStrategy implements VectorStoreService
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取向量模型
|
||||
*/
|
||||
@SneakyThrows
|
||||
protected EmbeddingModel getEmbeddingModel(String modelName, Integer dimension) {
|
||||
return embeddingModelFactory.createModel(modelName, dimension);
|
||||
}
|
||||
|
||||
/**
|
||||
* 获取向量库类型标识
|
||||
*/
|
||||
|
||||
@@ -4,9 +4,9 @@ import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.config.VectorStoreProperties;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.strategy.impl.MilvusVectorStoreStrategy;
|
||||
import org.ruoyi.service.strategy.impl.WeaviateVectorStoreStrategy;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package org.ruoyi.service.strategy.impl;
|
||||
|
||||
import cn.hutool.json.JSONObject;
|
||||
import org.ruoyi.common.core.exception.ServiceException;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import io.weaviate.client.Config;
|
||||
@@ -18,12 +17,17 @@ import io.weaviate.client.v1.schema.model.WeaviateClass;
|
||||
import lombok.SneakyThrows;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.config.VectorStoreProperties;
|
||||
import org.ruoyi.common.core.exception.ServiceException;
|
||||
import org.ruoyi.domain.bo.QueryVectorBo;
|
||||
import org.ruoyi.domain.bo.StoreEmbeddingBo;
|
||||
import org.ruoyi.embedding.EmbeddingModelFactory;
|
||||
import org.ruoyi.service.strategy.AbstractVectorStoreStrategy;
|
||||
import org.springframework.stereotype.Component;
|
||||
import java.util.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Weaviate向量库策略实现
|
||||
@@ -85,8 +89,8 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
||||
|
||||
@Override
|
||||
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
||||
createSchema(storeEmbeddingBo.getKid(),storeEmbeddingBo.getEmbeddingModelName());
|
||||
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), null);
|
||||
createSchema(storeEmbeddingBo.getKid(), storeEmbeddingBo.getEmbeddingModelName());
|
||||
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(), null);
|
||||
List<String> chunkList = storeEmbeddingBo.getChunkList();
|
||||
List<String> fidList = storeEmbeddingBo.getFids();
|
||||
String kid = storeEmbeddingBo.getKid();
|
||||
@@ -115,11 +119,10 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
||||
}
|
||||
|
||||
|
||||
|
||||
@Override
|
||||
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
|
||||
createSchema(queryVectorBo.getKid(),queryVectorBo.getEmbeddingModelName());
|
||||
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),null);
|
||||
createSchema(queryVectorBo.getKid(), queryVectorBo.getEmbeddingModelName());
|
||||
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(), null);
|
||||
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
|
||||
float[] vector = queryEmbedding.vector();
|
||||
List<String> vectorStrings = new ArrayList<>();
|
||||
@@ -128,7 +131,7 @@ public class WeaviateVectorStoreStrategy extends AbstractVectorStoreStrategy {
|
||||
}
|
||||
String vectorStr = String.join(",", vectorStrings);
|
||||
String className = vectorStoreProperties.getWeaviate().getClassname();
|
||||
|
||||
|
||||
// 构建 GraphQL 查询
|
||||
String graphQLQuery = String.format(
|
||||
"{\n" +
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
|
||||
<mapper namespace="org.ruoyi.mapper.KnowledgeInfoMapper">
|
||||
<select id="selectByKid" resultType="org.ruoyi.domain.KnowledgeInfo">
|
||||
SELECT * FROM knowledge_info WHERE kid = #{kid}
|
||||
SELECT *
|
||||
FROM knowledge_info
|
||||
WHERE kid = #{kid}
|
||||
</select>
|
||||
</mapper>
|
||||
|
||||
Reference in New Issue
Block a user