10 Commits

Author SHA1 Message Date
ageerle
05ae200ff5 Merge pull request #82 from chenwei1015/fix-20250514-knowledge_info
fix- 20250514.sql, alter单词错误
2025-05-14 17:42:21 +08:00
weizhangfz
e745f772ef fix- 20250514.sql, alter单词错误 2025-05-14 17:23:34 +08:00
ageerle
da84a26c47 feat: 更新sql脚本 2025-05-14 13:56:09 +08:00
ageerle
7d3282c347 feat: 修复知识库上传失败 2025-05-14 13:53:49 +08:00
ageerle
4454be44c3 Update README.md 2025-05-13 14:40:27 +08:00
ageerle
c89f5d07fb Merge pull request #80 from janzhou123/main
Extract the image from the PDF and call the large model to identify the image content and return it
2025-05-13 11:08:20 +08:00
zhouweiyi
778a7bc21b 提取PDF中的图片并调用大模型,识别图片内容并返回 2025-05-13 11:01:33 +08:00
zhouweiyi
50f5f38996 提取PDF中的图片并调用大模型,识别图片内容并返回 2025-05-13 10:56:05 +08:00
zhouweiyi
32da85daab 提取PDF中的图片并调用大模型,识别图片内容并返回 2025-05-13 10:55:39 +08:00
Albert
3666157d14 Merge pull request #1 from ageerle/main
new
2025-05-13 09:59:36 +08:00
18 changed files with 609 additions and 191 deletions

View File

@@ -34,6 +34,23 @@
<a href="https://github.com/ageerle/ruoyi-ai/issues">提出新特性</a>
</p>
## 快速启动
1. **克隆项目**
```bash
git clone https://github.com/alanpeng/ruoyi-ai-docker-deploy
cd ruoyi-ai-docker-deploy
```
2. **启动全套应用**
```bash
docker-compose up -d
```
3. **访问应用界面**
- 用户界面:`http://your-server-ip:8081`
- 管理员界面:`http://your-server-ip:8082`
## 目录
- [系统体验](#系统体验)

View File

@@ -94,3 +94,11 @@ sms:
# 腾讯专用
sdkAppId:
pdf:
extract:
service:
url: http://localhost:8080
ai-api:
url: https://api.pandarobot.chat/v1/chat/completions
key: sk-xxxx

View File

@@ -172,3 +172,11 @@ sms:
signName: 测试
# 腾讯专用
sdkAppId:
pdf:
extract:
service:
url: http://localhost:8080
ai-api:
url: https://api.pandarobot.chat/v1/chat/completions
key: sk-XXXXXX

View File

@@ -1,5 +1,6 @@
package org.ruoyi.domain;
import com.alibaba.excel.annotation.ExcelProperty;
import com.baomidou.mybatisplus.annotation.*;
import lombok.Data;
import lombok.EqualsAndHashCode;
@@ -78,14 +79,19 @@ public class KnowledgeInfo extends BaseEntity {
private Long textBlockSize;
/**
* 向量库
* 向量库模型名称
*/
private String vector;
private String vectorModelName;
/**
* 向量模型
* 向量模型名称
*/
private String vectorModel;
private String embeddingModelName;
/**
* 系统提示词
*/
private String systemPrompt;
/**
* 备注

View File

@@ -0,0 +1,30 @@
package org.ruoyi.domain;
/**
* 文件内容结果封装类
*/
public class PdfFileContentResult {
private String filename;
private String content;
public PdfFileContentResult(String filename, String content) {
this.filename = filename;
this.content = content;
}
public String getFilename() {
return filename;
}
public void setFilename(String filename) {
this.filename = filename;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}

View File

@@ -83,16 +83,22 @@ public class KnowledgeInfoBo extends BaseEntity {
private Long textBlockSize;
/**
* 向量库
* 向量库模型名称
*/
@NotBlank(message = "向量库不能为空", groups = { AddGroup.class, EditGroup.class })
private String vector;
private String vectorModelName;
/**
* 向量模型
* 向量模型名称
*/
@NotBlank(message = "向量模型不能为空", groups = { AddGroup.class, EditGroup.class })
private String vectorModel;
private String embeddingModelName;
/**
* 系统提示词
*/
private String systemPrompt;
/**
* 备注

View File

@@ -26,9 +26,14 @@ public class QueryVectorBo {
private Integer maxResults;
/**
* 模型名称
* 向量库模型名称
*/
private String modelName;
private String vectorModelName;
/**
* 向量化模型名称
*/
private String embeddingModelName;
/**
* 请求key

View File

@@ -32,9 +32,14 @@ public class StoreEmbeddingBo {
private List<String> fids;
/**
* 模型名称
* 向量库模型名称
*/
private String modelName;
private String vectorModelName;
/**
* 向量化模型名称
*/
private String embeddingModelName;
/**
* 请求key

View File

@@ -98,16 +98,20 @@ public class KnowledgeInfoVo implements Serializable {
private Integer textBlockSize;
/**
* 向量库
* 向量库模型名称
*/
@ExcelProperty(value = "向量库")
private String vector;
private String vectorModelName;
/**
* 向量模型
* 向量模型名称
*/
@ExcelProperty(value = "向量模型")
private String vectorModel;
private String embeddingModelName;
/**
* 系统提示词
*/
private String systemPrompt;
/**
* 备注

View File

@@ -0,0 +1,41 @@
package org.ruoyi.service;
import java.io.IOException;
import java.util.List;
import org.ruoyi.domain.PdfFileContentResult;
import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务接口
*/
public interface PdfImageExtractService {
/**
* 从PDF文件中提取图片
*
* @param pdfFile PDF文件
* @param imageFormat 输出图片格式 (png, jpeg, gif)
* @param allowDuplicates 是否允许重复图片
* @return 包含提取图片的ZIP文件的字节数组
* @throws IOException 如果文件处理过程中发生错误
*/
byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException;
/**
* 处理文件内容
*
* @param unzip Base64编码的图片数组
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
* @param file
* @return
* @throws IOException
*/
List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
}

View File

@@ -13,14 +13,14 @@ public interface VectorStoreService {
void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo);
void removeByDocId(String kid,String docId);
void removeByKid(String kid);
List<String> getQueryVector(QueryVectorBo queryVectorBo);
void createSchema(String kid,String modelName);
void removeByKidAndFid(String kid, String fid);
void removeByKid(String kid,String modelName);
void removeByDocId(String kid,String docId,String modelName);
void removeByKidAndFid(String kid, String fid,String modelName);
}

View File

@@ -0,0 +1,144 @@
package org.ruoyi.service.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.OkHttpClient.Builder;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
@Service
@Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey ;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
.readTimeout(150, TimeUnit.SECONDS)
.writeTimeout(150, TimeUnit.SECONDS)
.callTimeout(300, TimeUnit.SECONDS)
.build();
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
@Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
RequestBody requestBody = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
.addFormDataPart("format", imageFormat)
.addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
.build();
// 创建请求
Request request = new Request.Builder()
.url(serviceUrl + "/api/v1/misc/extract-images")
.post(requestBody)
.build();
// 执行请求
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new IOException("请求失败: " + response.code());
}
return response.body().bytes();
}
}
/**
* 处理文件内容
*
* @param unzip Base64编码的图片数组
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
List<PdfFileContentResult> results = new ArrayList<>();
int i = 0;
for (String base64Image : unzip) {
// 构建请求JSON
String requestJson = String.format("{"
+ "\"model\": \"gpt-4o\","
+ "\"stream\": false,"
+ "\"messages\": [{"
+ "\"role\": \"user\","
+ "\"content\": [{"
+ "\"type\": \"text\","
+ "\"text\": \"这张图片有什么\""
+ "}, {"
+ "\"type\": \"image_url\","
+ "\"image_url\": {"
+ "\"url\": \"%s\""
+ "}}"
+ "]}],"
+ "\"max_tokens\": 400"
+ "}", base64Image);
// 创建请求
Request request = new Request.Builder()
.url(aiApiUrl)
.addHeader("Authorization", "Bearer " + aiApiKey)
.post(RequestBody.create(JSON, requestJson))
.build();
// 执行请求
try {
log.info("=============call=" + ++i);
Response response = client.newCall(request).execute();
log.info("=============response=" + response);
if (!response.isSuccessful()) {
throw new IOException("API请求失败: " + response.code() + response.toString());
}
String responseBody = response.body().string();
log.info("=============responseBody=" + responseBody);
// 使用文件名这里使用base64的前10个字符作为标识和API返回内容创建结果对象
String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
results.add(new PdfFileContentResult(filename, responseBody));
} catch (Exception e) {
log.error(e.getMessage());
throw new RuntimeException(e);
}
}
return results;
}
@Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;
// 获取ZIP数据
byte[] zipData = this.extractImages(file, format, allowDuplicates);
// 解压文件并识别图片内容并返回
String[] unzip = ZipUtils.unzipForBase64(zipData);
//解析图片内容
return this.dealFileContent(unzip);
}
}

View File

@@ -1,5 +1,7 @@
package org.ruoyi.service.impl;
import cn.hutool.core.util.RandomUtil;
import com.google.protobuf.ServiceException;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
@@ -16,6 +18,7 @@ import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
import lombok.RequiredArgsConstructor;
import lombok.SneakyThrows;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.core.service.ConfigService;
import org.ruoyi.domain.bo.QueryVectorBo;
@@ -40,11 +43,10 @@ public class VectorStoreServiceImpl implements VectorStoreService {
private final ConfigService configService;
Map<String,EmbeddingStore<TextSegment>> storeMap = new HashMap<>();
private EmbeddingStore<TextSegment> embeddingStore;
@Override
public void createSchema(String kid,String modelName) {
EmbeddingStore<TextSegment> embeddingStore;
switch (modelName) {
case "weaviate" -> {
String protocol = configService.getConfigValue("weaviate", "protocol");
@@ -84,88 +86,83 @@ public class VectorStoreServiceImpl implements VectorStoreService {
embeddingStore = new InMemoryEmbeddingStore<>();
}
}
storeMap.put(kid,embeddingStore);
}
@Override
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
EmbeddingStore<TextSegment> store = storeMap.get(storeEmbeddingBo.getKid());
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getModelName(),
createSchema(storeEmbeddingBo.getKid(),storeEmbeddingBo.getVectorModelName());
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
for (int i = 0; i < storeEmbeddingBo.getChunkList().size(); i++) {
List<String> chunkList = storeEmbeddingBo.getChunkList();
for (int i = 0; i < chunkList.size(); i++) {
Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("kid", storeEmbeddingBo.getKid());
dataSchema.put("docId", storeEmbeddingBo.getKid());
dataSchema.put("fid", storeEmbeddingBo.getFids().get(i));
Response<Embedding> response = embeddingModel.embed(storeEmbeddingBo.getChunkList().get(i));
Embedding embedding = response.content();
TextSegment segment = TextSegment.from(storeEmbeddingBo.getChunkList().get(i));
Embedding embedding = embeddingModel.embed(chunkList.get(i)).content();
TextSegment segment = TextSegment.from(chunkList.get(i));
segment.metadata().putAll(dataSchema);
store.add(embedding,segment);
embeddingStore.add(embedding,segment);
}
}
@Override
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
EmbeddingStore<TextSegment> store = storeMap.get(queryVectorBo.getKid());
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getModelName(),
createSchema(queryVectorBo.getKid(),queryVectorBo.getVectorModelName());
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),
queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl());
Filter simpleFilter = new IsEqualTo("kid", queryVectorBo.getKid());
// Filter simpleFilter = new IsEqualTo("kid", queryVectorBo.getKid());
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
.queryEmbedding(queryEmbedding)
.maxResults(queryVectorBo.getMaxResults())
// 添加过滤条件
.filter(simpleFilter)
// .filter(simpleFilter)
.build();
List<EmbeddingMatch<TextSegment>> matches = store.search(embeddingSearchRequest).matches();
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches();
List<String> results = new ArrayList<>();
matches.forEach(embeddingMatch -> results.add(embeddingMatch.embedded().text()));
return results;
}
@Override
public void removeByKid(String kid) {
EmbeddingStore<TextSegment> store = storeMap.get(kid);
public void removeByKid(String kid,String modelName) {
createSchema(kid,modelName);
// 根据条件删除向量数据
Filter simpleFilter = new IsEqualTo("kid", kid);
store.removeAll(simpleFilter);
embeddingStore.removeAll(simpleFilter);
}
@Override
public void removeByDocId(String kid, String docId) {
EmbeddingStore<TextSegment> store = storeMap.get(kid);
public void removeByDocId(String kid, String docId,String modelName) {
createSchema(kid,modelName);
// 根据条件删除向量数据
Filter simpleFilterByDocId = new IsEqualTo("docId", docId);
store.removeAll(simpleFilterByDocId);
embeddingStore.removeAll(simpleFilterByDocId);
}
@Override
public void removeByKidAndFid(String kid, String fid) {
EmbeddingStore<TextSegment> store = storeMap.get(kid);
public void removeByKidAndFid(String kid, String fid,String modelName) {
createSchema(kid,modelName);
// 根据条件删除向量数据
Filter simpleFilterByKid = new IsEqualTo("kid", kid);
Filter simpleFilterFid = new IsEqualTo("fid", fid);
Filter simpleFilterByAnd = Filter.and(simpleFilterFid, simpleFilterByKid);
store.removeAll(simpleFilterByAnd);
embeddingStore.removeAll(simpleFilterByAnd);
}
/**
* 获取向量模型
*/
public EmbeddingModel getEmbeddingModel(String modelName,String apiKey,String baseUrl) {
EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder().build();
@SneakyThrows
public EmbeddingModel getEmbeddingModel(String modelName, String apiKey, String baseUrl) {
EmbeddingModel embeddingModel;
if(TEXT_EMBEDDING_3_SMALL.toString().equals(modelName)) {
embeddingModel = OpenAiEmbeddingModel.builder()
.apiKey(apiKey)
.baseUrl(baseUrl)
.modelName(TEXT_EMBEDDING_3_SMALL)
.modelName(modelName)
.build();
// TODO 添加枚举
}else if("quentinz/bge-large-zh-v1.5".equals(modelName)) {
@@ -173,6 +170,14 @@ public class VectorStoreServiceImpl implements VectorStoreService {
.baseUrl(baseUrl)
.modelName(modelName)
.build();
}else if("baai/bge-m3".equals(modelName)) {
embeddingModel = OpenAiEmbeddingModel.builder()
.apiKey(apiKey)
.baseUrl(baseUrl)
.modelName(modelName)
.build();
}else {
throw new ServiceException("未找到对应向量化模型!");
}
return embeddingModel;
}

View File

@@ -0,0 +1,95 @@
package org.ruoyi.utils;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* ZIP文件处理工具类
*/
public class ZipUtils {
/**
* 解压ZIP文件到指定目录
*
* @param zipData ZIP文件的字节数组
* @param destDir 目标目录
* @return 解压后的文件路径列表
* @throws IOException 如果解压过程中发生错误
*/
public static String[] unzip(byte[] zipData, String destDir) throws IOException {
File destDirFile = new File(destDir);
if (!destDirFile.exists()) {
destDirFile.mkdirs();
}
List<String> extractedPaths = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
String filePath = destDir + File.separator + zipEntry.getName();
if (!zipEntry.isDirectory()) {
extractFile(zis, filePath);
extractedPaths.add(filePath);
} else {
new File(filePath).mkdirs();
}
zis.closeEntry();
}
}
return extractedPaths.toArray(new String[0]);
}
private static void extractFile(ZipInputStream zis, String filePath) throws IOException {
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath))) {
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
bos.write(buffer, 0, read);
}
}
}
/**
* 解压ZIP文件并返回文件内容的Base64编码字符串数组
*
* @param zipData ZIP文件的字节数组
* @return Base64编码的文件内容数组
* @throws IOException 如果解压过程中发生错误
*/
public static String[] unzipForBase64(byte[] zipData) throws IOException {
List<String> base64Contents = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (!zipEntry.isDirectory()) {
// 读取文件内容到内存
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
// 将文件内容转换为Base64字符串
String base64Content = Base64.getEncoder().encodeToString(baos.toByteArray());
base64Contents.add(base64Content);
}
zis.closeEntry();
}
}
return base64Contents.toArray(new String[0]);
}
}

View File

@@ -1,9 +1,12 @@
package org.ruoyi.chat.controller.knowledge;
import cn.dev33.satoken.stp.StpUtil;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import jakarta.servlet.http.HttpServletResponse;
import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.NotNull;
import java.io.IOException;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.common.core.validate.AddGroup;
@@ -14,6 +17,7 @@ import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.common.web.core.BaseController;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.KnowledgeAttachBo;
import org.ruoyi.domain.bo.KnowledgeFragmentBo;
import org.ruoyi.domain.bo.KnowledgeInfoBo;
@@ -24,6 +28,7 @@ import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.service.IKnowledgeAttachService;
import org.ruoyi.service.IKnowledgeFragmentService;
import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.PdfImageExtractService;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
@@ -41,117 +46,135 @@ import java.util.List;
@RequestMapping("/knowledge")
public class KnowledgeController extends BaseController {
private final IKnowledgeInfoService knowledgeInfoService;
private final IKnowledgeInfoService knowledgeInfoService;
private final IKnowledgeAttachService attachService;
private final IKnowledgeAttachService attachService;
private final IKnowledgeFragmentService fragmentService;
private final IKnowledgeFragmentService fragmentService;
/**
* 根据用户信息查询本地知识库
*/
@GetMapping("/list")
public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
if (!StpUtil.isLogin()) {
throw new SecurityException("请先去登录!");
}
bo.setUid(LoginHelper.getUserId());
return knowledgeInfoService.queryPageList(bo, pageQuery);
private final PdfImageExtractService pdfImageExtractService;
/**
* 根据用户信息查询本地知识库
*/
@GetMapping("/list")
public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
if (!StpUtil.isLogin()) {
throw new SecurityException("请先去登录!");
}
bo.setUid(LoginHelper.getUserId());
return knowledgeInfoService.queryPageList(bo, pageQuery);
}
/**
* 新增知识库
*/
@Log(title = "知识库", businessType = BusinessType.INSERT)
@PostMapping("/save")
public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
knowledgeInfoService.saveOne(bo);
return R.ok();
}
/**
* 新增知识库
*/
@Log(title = "知识库", businessType = BusinessType.INSERT)
@PostMapping("/save")
public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
knowledgeInfoService.saveOne(bo);
return R.ok();
}
/**
* 删除知识库
*/
@PostMapping("/remove/{id}")
public R<String> remove(@PathVariable String id) {
knowledgeInfoService.removeKnowledge(id);
return R.ok("删除知识库成功!");
}
/**
* 删除知识库
*/
@PostMapping("/remove/{id}")
public R<String> remove(@PathVariable String id) {
knowledgeInfoService.removeKnowledge(id);
return R.ok("删除知识库成功!");
}
/**
* 修改知识库
*/
@Log(title = "知识库", businessType = BusinessType.UPDATE)
@PostMapping("/edit")
public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
return toAjax(knowledgeInfoService.updateByBo(bo));
}
/**
* 修改知识库
*/
@Log(title = "知识库", businessType = BusinessType.UPDATE)
@PostMapping("/edit")
public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
return toAjax(knowledgeInfoService.updateByBo(bo));
}
/**
* 导出知识库列表
*/
@Log(title = "知识库", businessType = BusinessType.EXPORT)
@PostMapping("/export")
public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
}
/**
* 导出知识库列表
*/
@Log(title = "知识库", businessType = BusinessType.EXPORT)
@PostMapping("/export")
public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
}
/**
* 查询知识附件信息
*/
@GetMapping("/detail/{kid}")
public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery, @PathVariable String kid) {
bo.setKid(kid);
return attachService.queryPageList(bo, pageQuery);
}
/**
* 查询知识附件信息
*/
@GetMapping("/detail/{kid}")
public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery,
@PathVariable String kid) {
bo.setKid(kid);
return attachService.queryPageList(bo, pageQuery);
}
/**
* 上传知识库附件
*/
@PostMapping(value = "/attach/upload")
public R<String> upload(KnowledgeInfoUploadBo bo) {
knowledgeInfoService.upload(bo);
return R.ok("上传知识库附件成功!");
}
/**
* 上传知识库附件
*/
@PostMapping(value = "/attach/upload")
public R<String> upload(KnowledgeInfoUploadBo bo) {
knowledgeInfoService.upload(bo);
return R.ok("上传知识库附件成功!");
}
/**
* 获取知识库附件详细信息
*
* @param id 主键
*/
@GetMapping("attach/info/{id}")
public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
@PathVariable Long id) {
return R.ok(attachService.queryById(id));
}
/**
* 获取知识库附件详细信息
*
* @param id 主键
*/
@GetMapping("attach/info/{id}")
public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
@PathVariable Long id) {
return R.ok(attachService.queryById(id));
}
/**
* 删除知识库附件
*/
@PostMapping("attach/remove/{kid}")
public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
@PathVariable String kid) {
attachService.removeKnowledgeAttach(kid);
return R.ok();
}
/**
* 删除知识库附件
*/
@PostMapping("attach/remove/{kid}")
public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
@PathVariable String kid) {
attachService.removeKnowledgeAttach(kid);
return R.ok();
}
/**
* 查询知识片段
*/
@GetMapping("/fragment/list/{docId}")
public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo, PageQuery pageQuery, @PathVariable String docId) {
bo.setDocId(docId);
return fragmentService.queryPageList(bo, pageQuery);
}
/**
* 查询知识片段
*/
@GetMapping("/fragment/list/{docId}")
public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo,
PageQuery pageQuery, @PathVariable String docId) {
bo.setDocId(docId);
return fragmentService.queryPageList(bo, pageQuery);
}
/**
* 上传文件翻译
*/
@PostMapping("/translationByFile")
@ResponseBody
public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
return attachService.translationByFile(file, targetLanguage);
}
/**
* 上传文件翻译
*/
@PostMapping("/translationByFile")
@ResponseBody
public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
return attachService.translationByFile(file, targetLanguage);
}
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
*
* @param file PDF文件
* @return 保存的文件路径信息
*/
@PostMapping("/extract-images")
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
public R<List<PdfFileContentResult>> extractImages(
@RequestPart("file") MultipartFile file
) throws IOException {
return R.ok(pdfImageExtractService.extractImages(file));
}
}

View File

@@ -2,6 +2,7 @@ package org.ruoyi.chat.service.chat.impl;
import cn.dev33.satoken.stp.StpUtil;
import cn.hutool.core.collection.CollectionUtil;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.google.protobuf.ServiceException;
import jakarta.servlet.http.HttpServletRequest;
import lombok.RequiredArgsConstructor;
@@ -29,6 +30,8 @@ import org.ruoyi.common.redis.utils.RedisUtils;
import org.ruoyi.domain.bo.ChatSessionBo;
import org.ruoyi.domain.bo.QueryVectorBo;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.IChatSessionService;
@@ -67,6 +70,8 @@ public class SseServiceImpl implements ISseService {
private final IChatSessionService chatSessionService;
private final IKnowledgeInfoService knowledgeInfoService;
private ChatModelVo chatModelVo;
@@ -148,50 +153,61 @@ public class SseServiceImpl implements ISseService {
}
}
/**
* 构建消息列表
*/
private void buildChatMessageList(ChatRequest chatRequest){
chatModelVo = chatModelService.selectModelByName(chatRequest.getModel());
String sysPrompt;
chatModelVo = chatModelService.selectModelByName(chatRequest.getModel());
// 获取对话消息列表
List<Message> messages = chatRequest.getMessages();
String sysPrompt = chatModelVo.getSystemPrompt();
// 查询向量库相关信息加入到上下文
if(StringUtils.isNotEmpty(chatRequest.getKid())){
List<Message> knMessages = new ArrayList<>();
String content = messages.get(messages.size() - 1).getContent().toString();
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(chatRequest.getKid()));
// 查询向量模型配置信息
ChatModelVo chatModel = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName());
if(StringUtils.isEmpty(sysPrompt)){
// TODO 系统默认提示词,后续会增加提示词管理
sysPrompt ="你是一个由RuoYI-AI开发的人工智能助手名字叫熊猫助手。你擅长中英文对话能够理解并处理各种问题提供安全、有帮助、准确的回答。" +
"当前时间:"+ DateUtils.getDate()+
"#注意:回复之前注意结合上下文和工具返回内容进行回复。";
QueryVectorBo queryVectorBo = new QueryVectorBo();
queryVectorBo.setQuery(content);
queryVectorBo.setKid(chatRequest.getKid());
queryVectorBo.setApiKey(chatModel.getApiKey());
queryVectorBo.setBaseUrl(chatModel.getApiHost());
queryVectorBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
queryVectorBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
queryVectorBo.setMaxResults(knowledgeInfoVo.getRetrieveLimit());
List<String> nearestList = vectorStoreService.getQueryVector(queryVectorBo);
for (String prompt : nearestList) {
Message userMessage = Message.builder().content(prompt).role(Message.Role.USER).build();
knMessages.add(userMessage);
}
messages.addAll(knMessages);
// 设置知识库系统提示词
sysPrompt = knowledgeInfoVo.getSystemPrompt();
if(StringUtils.isEmpty(sysPrompt)){
sysPrompt ="###角色设定\n" +
"你是一个智能知识助手,专注于利用上下文中的信息来提供准确和相关的回答。\n" +
"###指令\n" +
"当用户的问题与上下文知识匹配时,利用上下文信息进行回答。如果问题与上下文不匹配,运用自身的推理能力生成合适的回答。\n" +
"###限制\n" +
"确保回答清晰简洁,避免提供不必要的细节。始终保持语气友好" +
"当前时间:"+ DateUtils.getDate();
}
}else {
sysPrompt = chatModelVo.getSystemPrompt();
if(StringUtils.isEmpty(sysPrompt)){
sysPrompt ="你是一个由RuoYI-AI开发的人工智能助手名字叫熊猫助手。你擅长中英文对话能够理解并处理各种问题提供安全、有帮助、准确的回答。" +
"当前时间:"+ DateUtils.getDate()+
"#注意:回复之前注意结合上下文和工具返回内容进行回复。";
}
}
// 设置系统默认提示词
Message sysMessage = Message.builder().content(sysPrompt).role(Message.Role.SYSTEM).build();
messages.add(0,sysMessage);
chatRequest.setSysPrompt(sysPrompt);
// 查询向量库相关信息加入到上下文
if(StringUtils.isNotEmpty(chatRequest.getKid())){
List<Message> knMessages = new ArrayList<>();
String content = messages.get(messages.size() - 1).getContent().toString();
QueryVectorBo queryVectorBo = new QueryVectorBo();
queryVectorBo.setQuery(content);
queryVectorBo.setKid(chatRequest.getKid());
queryVectorBo.setApiKey(chatModelVo.getApiKey());
queryVectorBo.setBaseUrl(chatModelVo.getApiHost());
queryVectorBo.setModelName(chatModelVo.getModelName());
// TODO 查询向量返回条数,这里应该查询知识库配置
queryVectorBo.setMaxResults(3);
List<String> nearestList = vectorStoreService.getQueryVector(queryVectorBo);
for (String prompt : nearestList) {
Message userMessage = Message.builder().content(prompt).role(Message.Role.USER).build();
knMessages.add(userMessage);
}
// TODO 提示词,这里应该查询知识库配置
Message userMessage = Message.builder().content(content + (!nearestList.isEmpty() ? "\n\n注意回答问题时须严格根据我给你的系统上下文内容原文进行回答请不要自己发挥,回答时保持原来文本的段落层级" : "")).role(Message.Role.USER).build();
knMessages.add(userMessage);
messages.addAll(knMessages);
}
// 用户对话内容
String chatString = null;
// 获取用户对话信息

View File

@@ -102,8 +102,6 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar());
lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit());
lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize());
lqw.eq(StringUtils.isNotBlank(bo.getVector()), KnowledgeInfo::getVector, bo.getVector());
lqw.eq(StringUtils.isNotBlank(bo.getVectorModel()), KnowledgeInfo::getVectorModel, bo.getVectorModel());
return lqw;
}
@@ -161,7 +159,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
}
baseMapper.insert(knowledgeInfo);
if (knowledgeInfo != null) {
vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()),bo.getVector());
vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()),bo.getVectorModelName());
}
}else {
baseMapper.updateById(knowledgeInfo);
@@ -177,7 +175,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
check(knowledgeInfoList);
// 删除向量库信息
knowledgeInfoList.forEach(knowledgeInfoVo -> {
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()));
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName());
});
// 删除附件和知识片段
fragmentMapper.deleteByMap(map);
@@ -231,17 +229,18 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getKid, kid));
.eq(KnowledgeInfo::getId, kid));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName());
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(kid);
storeEmbeddingBo.setDocId(docId);
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setModelName(knowledgeInfoVo.getVectorModel());
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);

View File

@@ -0,0 +1,6 @@
ALTER TABLE `knowledge_info`
ADD COLUMN `system_prompt` varchar(255) NULL COMMENT '系统提示词' AFTER `vector_model`;
ALTER TABLE `knowledge_info`
CHANGE COLUMN `vector` `vector_model_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '向量库' AFTER `text_block_size`,
CHANGE COLUMN `vector_model` `embedding_model_name` varchar(50) CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL DEFAULT NULL COMMENT '向量模型' AFTER `vector_model_name`;