5 Commits

Author SHA1 Message Date
zhouweiyi
cbe882af66 chore: 更新百炼模型配置的API密钥
将开发环境和本地环境的百炼模型配置中的API密钥统一更新为占位符'sk-xxxx'
2025-06-04 18:02:26 +08:00
zhouweiyi
1d51a103d0 feat: 集成阿里百炼API实现图片内容识别功能
添加DashscopeService接口及实现,用于调用阿里百炼API进行图片内容识别
修改PdfImageExtractService增加基于百炼API的图片处理逻辑
新增OSS服务方法支持临时文件处理和删除
更新配置文件添加百炼模型相关配置
2025-06-04 17:55:47 +08:00
ageerle
53e3180658 Merge pull request #93 from janzhou123/pdf-image
feat:增加knowledge_attach_pic表结构sql
2025-05-20 09:29:40 +08:00
zhouweiyi
e43e14454d feat:增加knowledge_attach_pic表结构sql 2025-05-20 09:22:41 +08:00
ageerle
a4e995d46c feat: 新增pdf图片解析分支 2025-05-19 15:33:29 +08:00
16 changed files with 526 additions and 90 deletions

View File

@@ -101,4 +101,7 @@ pdf:
ai-api: ai-api:
url: https://api.pandarobot.chat/v1/chat/completions url: https://api.pandarobot.chat/v1/chat/completions
key: sk-xxxx key: sk-xxxx
#百炼模型配置
dashscope:
key: sk-xxxx
model: qvq-max

View File

@@ -0,0 +1,111 @@
--- # 监控中心配置
spring.boot.admin.client:
# 增加客户端开关
enabled: false
url: http://localhost:9090/admin
instance:
service-host-type: IP
username: ruoyi
password: 123456
--- # 数据源配置
spring:
datasource:
type: com.zaxxer.hikari.HikariDataSource
# 动态数据源文档 https://www.kancloud.cn/tracy5546/dynamic-datasource/content
dynamic:
# 性能分析插件(有性能损耗 不建议生产环境使用)
p6spy: true
# 设置默认的数据源或者数据源组,默认值即为 master
primary: master
# 严格模式 匹配不到数据源则报错
strict: true
datasource:
# 主库数据源
master:
type: ${spring.datasource.type}
driverClassName: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://localhost:3306/ruoyi-ai?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true
username: root
password: root
hikari:
# 最大连接池数量
maxPoolSize: 20
# 最小空闲线程数量
minIdle: 10
# 配置获取连接等待超时的时间
connectionTimeout: 30000
# 校验超时时间
validationTimeout: 5000
# 空闲连接存活最大时间默认10分钟
idleTimeout: 600000
# 此属性控制池中连接的最长生命周期值0表示无限生命周期默认30分钟
maxLifetime: 1800000
# 连接测试query配置检测连接是否有效
connectionTestQuery: SELECT 1
# 多久检查一次连接的活性
keepaliveTime: 30000
--- # redis 单机配置(单机与集群只能开启一个另一个需要注释掉)
spring.data:
redis:
# 地址
host: 127.0.0.1
# 端口默认为6379
port: 6379
# 数据库索引
database: 0
# 密码(如没有密码请注释掉)
password: root
# 连接超时时间
timeout: 10S
# 是否开启ssl
ssl: false
redisson:
# redis key前缀
keyPrefix:
# 线程池数量
threads: 4
# Netty线程池数量
nettyThreads: 8
# 单节点配置
singleServerConfig:
# 客户端名称
clientName: ${ruoyi.name}
# 最小空闲连接数
connectionMinimumIdleSize: 8
# 连接池大小
connectionPoolSize: 32
# 连接空闲超时,单位:毫秒
idleConnectionTimeout: 10000
# 命令等待超时,单位:毫秒
timeout: 3000
# 发布和订阅连接池大小
subscriptionConnectionPoolSize: 50
--- # sms 短信
sms:
enabled: false
# 阿里云 dysmsapi.aliyuncs.com
# 腾讯云 sms.tencentcloudapi.com
endpoint: "dysmsapi.aliyuncs.com"
accessKeyId: xxxxxxx
accessKeySecret: xxxxxx
signName: 测试
# 腾讯专用
sdkAppId:
pdf:
extract:
service:
url: http://localhost:8080
ai-api:
url: https://api.pandarobot.chat/v1/chat/completions
key: sk-xxxx
#百炼模型配置
dashscope:
key: sk-xxxx
model: qvq-max

View File

@@ -180,3 +180,7 @@ pdf:
ai-api: ai-api:
url: https://api.pandarobot.chat/v1/chat/completions url: https://api.pandarobot.chat/v1/chat/completions
key: sk-XXXXXX key: sk-XXXXXX
#百炼模型配置
dashscope:
key: sk-XXXX
model: qvq-max

View File

@@ -119,6 +119,12 @@
<artifactId>ruoyi-system-api</artifactId> <artifactId>ruoyi-system-api</artifactId>
</dependency> </dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>dashscope-sdk-java</artifactId>
<version>2.19.0</version>
</dependency>
</dependencies> </dependencies>
</project> </project>

View File

@@ -0,0 +1,23 @@
package org.ruoyi.service;
import java.io.IOException;
/**
* @Description: 阿里百炼api
* @Date: 2025/6/4 下午2:24
*/
public interface DashscopeService {
/**
* 视觉推理QVQ
* @param imageUrl 图片可访问的地址
* @return
*/
String qvq(String imageUrl) throws IOException;
/**
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
String qvq4LocalPath(String localPath) throws IOException;
}

View File

@@ -30,7 +30,23 @@ public interface PdfImageExtractService {
* @throws IOException 如果API调用过程中发生错误 * @throws IOException 如果API调用过程中发生错误
*/ */
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException; List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
/**
*利用百炼接口处理文件内容
*
* @param imageUrl 传入图片地址
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException;
/**
* 利用百炼接口处理文件内容
*
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath)throws IOException;
/** /**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回 * 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
* @param file * @param file

View File

@@ -0,0 +1,149 @@
package org.ruoyi.service.impl;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import io.reactivex.Flowable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.DashscopeService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* @Description: 阿里百炼API
* @Date: 2025/6/4 下午2:28
*/
@Service
@Slf4j
public class DashscopeServiceImpl implements DashscopeService {
private static StringBuilder reasoningContent = new StringBuilder();
private static StringBuilder finalContent = new StringBuilder();
private static boolean isFirstPrint = true;
@Value("${dashscope.model}")
private String serviceModel;
@Value("${dashscope.key}")
private String serviceKey;
/**
* 视觉推理QVQ
* @param imageUrl 图片可访问地址
* @return
*/
@Override
public String qvq(String imageUrl) throws IOException {
try {
// 构建多模态消息
MultiModalMessage userMessage = MultiModalMessage.builder()
.role(Role.USER.getValue())
.content(Arrays.asList(
Collections.singletonMap("text", "这张图片有什么"),
Collections.singletonMap("image", imageUrl)
))
.build();
// 构建请求参数
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(serviceKey) // 使用配置文件中的API Key
.model(serviceModel)
.message(userMessage)
.build();
MultiModalConversation conv = new MultiModalConversation();
// 调用API
Flowable<MultiModalConversationResult> result = conv.streamCall(
param);
reasoningContent = new StringBuilder();
finalContent = new StringBuilder();
isFirstPrint = true;
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
return finalContent.toString().replaceAll("[\n\r\s]", "");
} catch (Exception e) {
log.error("调用百炼API失败: {}", e.getMessage(), e);
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
}
}
/**
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
@Override
public String qvq4LocalPath(String localPath) throws IOException {
try {
// 构建多模态消息
String filePath = "file://"+ localPath;
log.info("filePath: {}", filePath);
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(new HashMap<String, Object>(){{put("image", filePath);}},
new HashMap<String, Object>(){{put("text", "这张图片有什么");}})).build();
// 构建请求参数
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(serviceKey) // 使用配置文件中的API Key
.model(serviceModel)
.message(userMessage)
.build();
MultiModalConversation conv = new MultiModalConversation();
// 调用API
Flowable<MultiModalConversationResult> result = conv.streamCall(
param);
reasoningContent = new StringBuilder();
finalContent = new StringBuilder();
isFirstPrint = true;
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
return finalContent.toString().replaceAll("[\n\r\s]", "");
} catch (Exception e) {
log.error("调用百炼API失败: {}", e.getMessage(), e);
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
}
}
private static void handleGenerationResult(MultiModalConversationResult message) {
String re = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
String reasoning = Objects.isNull(re) ? "" : re; // 默认值
List<Map<String, Object>> content = message.getOutput().getChoices().get(0).getMessage()
.getContent();
if (!reasoning.isEmpty()) {
reasoningContent.append(reasoning);
if (isFirstPrint) {
System.out.println("====================思考过程====================");
isFirstPrint = false;
}
System.out.print(reasoning);
}
if (Objects.nonNull(content) && !content.isEmpty()) {
Object text = content.get(0).get("text");
finalContent.append(text);
if (!isFirstPrint) {
System.out.println("\n====================完整回复====================");
isFirstPrint = true;
}
System.out.print(text);
}
}
}

View File

@@ -4,7 +4,6 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.AllArgsConstructor;
import lombok.Data; import lombok.Data;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType; import okhttp3.MediaType;
@@ -14,10 +13,11 @@ import okhttp3.OkHttpClient.Builder;
import okhttp3.Request; import okhttp3.Request;
import okhttp3.RequestBody; import okhttp3.RequestBody;
import okhttp3.Response; import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult; import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.DashscopeService;
import org.ruoyi.service.PdfImageExtractService; import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils; import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value; import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
@@ -25,20 +25,21 @@ import org.springframework.web.multipart.MultipartFile;
/** /**
* PDF图片提取服务实现类 * PDF图片提取服务实现类
*/ */
//@Service @Service
@Slf4j @Slf4j
@Data @Data
@AllArgsConstructor public class PdfImageExtractServiceImpl implements PdfImageExtractService {
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
public class PdfImageExtractServiceImpl {
// @Value("${pdf.extract.service.url}") @Value("${pdf.extract.service.url}")
private String serviceUrl; private String serviceUrl;
// @Value("${pdf.extract.ai-api.url}") @Value("${pdf.extract.ai-api.url}")
private String aiApiUrl; private String aiApiUrl;
// @Value("${pdf.extract.ai-api.key}") @Value("${pdf.extract.ai-api.key}")
private String aiApiKey; private String aiApiKey;
@Autowired
private DashscopeService dashscopeService;
private final OkHttpClient client = new Builder() private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS) .connectTimeout(100, TimeUnit.SECONDS)
.readTimeout(150, TimeUnit.SECONDS) .readTimeout(150, TimeUnit.SECONDS)
@@ -135,6 +136,42 @@ public class PdfImageExtractServiceImpl {
return results; return results;
} }
/**
* 利用百炼接口处理文件内容
*
* @param imageUrl 传入图片地址
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
public List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException {
String qvq = dashscopeService.qvq(imageUrl);
// 构建结果列表
List<PdfFileContentResult> results = new ArrayList<>();
String filename = "image_" + System.currentTimeMillis();
results.add(new PdfFileContentResult(filename, qvq));
return results;
}
/**
* 利用百炼接口处理文件内容
*
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
@Override
public List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath) throws IOException {
String qvq = dashscopeService.qvq4LocalPath(localPath);
// 构建结果列表
List<PdfFileContentResult> results = new ArrayList<>();
String filename = "image_" + System.currentTimeMillis();
results.add(new PdfFileContentResult(filename, qvq));
return results;
}
// @Override // @Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException { public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png"; String format = "png";

View File

@@ -1,13 +1,11 @@
package org.ruoyi.service.impl; package org.ruoyi.service.impl;
import cn.hutool.core.util.RandomUtil;
import com.google.protobuf.ServiceException; import com.google.protobuf.ServiceException;
import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.ollama.OllamaEmbeddingModel; import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
import dev.langchain4j.model.openai.OpenAiEmbeddingModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.store.embedding.EmbeddingMatch; import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest; import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.EmbeddingStore;
@@ -31,6 +29,7 @@ import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
import java.util.stream.Collectors;
/** /**
* 向量库管理 * 向量库管理

View File

@@ -32,6 +32,15 @@ public interface ISysOssService {
String downloadByByte(Long ossId) throws IOException; String downloadByByte(Long ossId) throws IOException;
String downloadToTempPath(Long ossId) throws IOException;
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid); Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
/**
* 根据文件路径删除文件
*
* @param filePath 文件路径
* @return 是否删除成功
*/
boolean deleteFile(String filePath);
} }

View File

@@ -213,4 +213,48 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
} }
return oss; return oss;
} }
@Override
public String downloadToTempPath(Long ossId) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
// 创建临时文件
String suffix = StringUtils.isNotEmpty(sysOss.getFileSuffix()) ? sysOss.getFileSuffix() : "";
java.io.File tempFile = java.io.File.createTempFile("download_", suffix);
// 确保临时文件在JVM退出时删除
tempFile.deleteOnExit();
// 将输入流内容写入临时文件
cn.hutool.core.io.FileUtil.writeFromStream(inputStream, tempFile);
// 返回临时文件的绝对路径
return tempFile.getAbsolutePath();
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
/**
* 根据文件路径删除文件
*
* @param filePath 文件路径
* @return 是否删除成功
*/
@Override
public boolean deleteFile(String filePath) {
if (StringUtils.isEmpty(filePath)) {
return false;
}
try {
java.io.File file = new java.io.File(filePath);
if (file.exists() && file.isFile()) {
return file.delete();
}
return false;
} catch (Exception e) {
throw new ServiceException("删除文件失败: " + e.getMessage());
}
}
} }

View File

@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
private final IKnowledgeFragmentService fragmentService; private final IKnowledgeFragmentService fragmentService;
// private final PdfImageExtractService pdfImageExtractService; private final PdfImageExtractService pdfImageExtractService;
/** /**
* 根据用户信息查询本地知识库 * 根据用户信息查询本地知识库
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
* @param file PDF文件 * @param file PDF文件
* @return 文件名称和图片内容 * @return 文件名称和图片内容
*/ */
// @PostMapping("/extract-images") @PostMapping("/extract-images")
// @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回") @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
// public R<List<PdfFileContentResult>> extractImages( public R<List<PdfFileContentResult>> extractImages(
// @RequestPart("file") MultipartFile file ) throws IOException {
// ) throws IOException { return R.ok(pdfImageExtractService
// return R.ok(pdfImageExtractService.extractImages(file)); .dealFileContent4Dashscope("https://hnzuoran02-1327573163.cos.ap-nanjing.myqcloud.com/crmebimage/public/content/2025/06/04/e115264eb22f423ea0b211709361c29f071avy39ez.jpg"));
// } }
} }

View File

@@ -26,6 +26,7 @@ import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper; import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper; import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService; import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl; import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo; import org.ruoyi.system.domain.vo.SysOssVo;
@@ -64,18 +65,10 @@ public class DealFileService {
private final ISysOssService ossService; private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService; private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper; private final KnowledgeAttachPicMapper picMapper;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Async @Async
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception { public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
try { try {
@@ -169,8 +162,6 @@ public class DealFileService {
//获取oss文件 //获取oss文件
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId()); MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
//拆解出图片ZIP //拆解出图片ZIP
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
aiApiUrl, aiApiKey);
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true); byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
//解压zip得到图片文件 //解压zip得到图片文件
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs); MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
@@ -236,6 +227,7 @@ public class DealFileService {
@Async @Async
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception { public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
String filePath = null;
try { try {
//锁定数据 更改 getPicAnysStatus 到进行中 //锁定数据 更改 getPicAnysStatus 到进行中
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>() if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
@@ -247,13 +239,10 @@ public class DealFileService {
} }
SysOssVo ossVo = ossService.getById(picItem.getOssId()); SysOssVo ossVo = ossService.getById(picItem.getOssId());
if (ObjectUtil.isNotEmpty(ossVo)) { if (ObjectUtil.isNotEmpty(ossVo)) {
String fileStr = ossService.downloadByByte(picItem.getOssId()); filePath = ossService.downloadToTempPath(picItem.getOssId());
//调用第三方 分析图片内容 //调用第三方 分析图片内容
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl( List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent4DashscopeBase64(
serviceUrl, filePath);
aiApiUrl, aiApiKey);
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
new String[]{fileStr});
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) { if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
for (PdfFileContentResult resultItem : pdfFileContentResults) { for (PdfFileContentResult resultItem : pdfFileContentResults) {
//图片解析内容回写到pic表 //图片解析内容回写到pic表
@@ -302,6 +291,11 @@ public class DealFileService {
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20) .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getId, picItem.getId())); .eq(KnowledgeAttachPic::getId, picItem.getId()));
throw new RuntimeException(e); throw new RuntimeException(e);
} finally {
//无论成功还是失败,都要删除临时文件
if (ObjectUtil.isNotEmpty(filePath)) {
ossService.deleteFile(filePath);
}
} }
} }
@@ -349,35 +343,32 @@ public class DealFileService {
return null; return null;
} }
public static String parseContent(String jsonString) { public static String parseContent(String content) {
try { try {
// 创建ObjectMapper实例 // 首先尝试作为JSON解析
ObjectMapper objectMapper = new ObjectMapper(); ObjectMapper objectMapper = new ObjectMapper();
JsonNode rootNode = objectMapper.readTree(content);
// 解析JSON字符串 // 如果是JSON格式,按原有逻辑处理
JsonNode rootNode = objectMapper.readTree(jsonString);
// 获取choices数组的第一个元素
JsonNode choicesNode = rootNode.get("choices"); JsonNode choicesNode = rootNode.get("choices");
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) { if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
// 获取第一个choice
JsonNode firstChoice = choicesNode.get(0); JsonNode firstChoice = choicesNode.get(0);
// 获取message节点
JsonNode messageNode = firstChoice.get("message"); JsonNode messageNode = firstChoice.get("message");
if (messageNode != null) { if (messageNode != null) {
// 获取content字段的值
JsonNode contentNode = messageNode.get("content"); JsonNode contentNode = messageNode.get("content");
if (contentNode != null) { if (contentNode != null) {
return contentNode.asText(); return contentNode.asText();
} }
} }
}
return "无法找到content内容"; return "无法找到content内容";
}
// 如果不是预期的JSON格式直接返回原始内容
return content;
} catch (Exception e) { } catch (Exception e) {
e.printStackTrace(); // 如果解析JSON失败说明是普通文本直接返回
return "解析JSON时发生错误: " + e.getMessage(); return content;
} }
} }

View File

@@ -57,7 +57,6 @@ import org.ruoyi.system.service.ISysOssService;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
/** /**
* 知识库Service业务层处理 * 知识库Service业务层处理
* *
@@ -234,8 +233,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
.collect(Collectors.toList())) .collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid, .in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId) knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList())) .collect(Collectors.toList())));
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream() Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId) .map(KnowledgeAttachPic::getOssId)
@@ -334,15 +332,14 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
/** /**
* 第一步 定时 拆解PDF文件中的图片 * 第一步 定时 拆解PDF文件中的图片
*/ */
//@Scheduled(fixedDelay = 15000) // 每3秒执行一次 @Scheduled(fixedDelay = 15000) // 每3秒执行一次
public void dealKnowledgeAttachPic() throws Exception { public void dealKnowledgeAttachPic() throws Exception {
// 处理 拆解PDF文件中的图片的记录 // 处理 拆解PDF文件中的图片的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList( List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>() new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
);
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size()); log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) { if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) { for (KnowledgeAttach attachItem : knowledgeAttaches) {
@@ -354,32 +351,31 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
/** /**
* 第二步 定时 解析图片内容 * 第二步 定时 解析图片内容
*/ */
//@Scheduled(fixedDelay = 15000) @Scheduled(fixedDelay = 15000)
public void dealKnowledgeAttachPicAnys() throws Exception { public void dealKnowledgeAttachPicAnys() throws Exception {
// 获取未处理的图片记录 // 获取未处理的图片记录
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList( List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>() new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.last("LIMIT 20") .last("LIMIT 20"));
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
for (KnowledgeAttachPic picItem : knowledgeAttachPics) { for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
dealFileService.dealPicAnysStatus(picItem); dealFileService.dealPicAnysStatus(picItem);
} }
} }
} }
/** /**
* 第三步 定时 处理 附件上传后上传向量数据库 * 第三步 定时 处理 附件上传后上传向量数据库
*/ */
//@Scheduled(fixedDelay = 30000) // 每3秒执行一次 @Scheduled(fixedDelay = 30000) // 每3秒执行一次
public void dealKnowledgeAttachVector() throws Exception { public void dealKnowledgeAttachVector() throws Exception {
// 处理 需要上传向量数据库的记录 // 处理 需要上传向量数据库的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList( List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>() new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
);
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size()); log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) { if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) { for (KnowledgeAttach attachItem : knowledgeAttaches) {
@@ -387,10 +383,11 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
} }
} }
/** /**
* 第四步 定时 处理 失败数据 * 第四步 定时 处理 失败数据
*/ */
//@Scheduled(fixedDelay = 30 * 60 * 1000) @Scheduled(fixedDelay = 30 * 60 * 1000)
public void dealKnowledge40Status() throws Exception { public void dealKnowledge40Status() throws Exception {
// 拆解PDF失败 重新设置状态 // 拆解PDF失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
@@ -405,5 +402,5 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)); .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
} }
@Scheduled(fixedDelay = 180000) // 3分钟执行一次
} }

View File

@@ -8,3 +8,48 @@ ADD PRIMARY KEY (`id`) USING BTREE;
ALTER TABLE `knowledge_attach` ALTER TABLE `knowledge_attach`
MODIFY COLUMN `remark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '备注' AFTER `update_time`; MODIFY COLUMN `remark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '备注' AFTER `update_time`;
/*
Navicat Premium Data Transfer
Source Server : localhost-57
Source Server Type : MySQL
Source Server Version : 50731 (5.7.31)
Source Host : localhost:3306
Source Schema : ruoyi-ai
Target Server Type : MySQL
Target Server Version : 50731 (5.7.31)
File Encoding : 65001
Date: 19/05/2025 15:22:09
*/
SET NAMES utf8mb4;
SET FOREIGN_KEY_CHECKS = 0;
-- ----------------------------
-- Table structure for knowledge_attach_pic
-- ----------------------------
DROP TABLE IF EXISTS `knowledge_attach_pic`;
CREATE TABLE `knowledge_attach_pic` (
`id` bigint(20) NOT NULL AUTO_INCREMENT COMMENT '主键',
`kid` varchar(50) NOT NULL COMMENT '知识库id',
`aid` varchar(50) NOT NULL COMMENT '附件id',
`doc_name` varchar(500) DEFAULT NULL COMMENT '文档名称',
`doc_type` varchar(50) NOT NULL COMMENT '文档类型',
`content` longtext COMMENT '文档内容',
`page_num` int(5) DEFAULT '0' COMMENT '所在页数',
`index_num` int(5) DEFAULT '0' COMMENT '所在页index',
`pic_anys_status` int(5) NOT NULL DEFAULT '10' COMMENT '分析图片状态10未开始20进行中30已完成',
`oss_id` bigint(20) NOT NULL COMMENT '对象存储主键',
`create_dept` varchar(255) DEFAULT NULL COMMENT '部门',
`create_by` varchar(50) DEFAULT NULL COMMENT '创建人',
`create_time` datetime DEFAULT NULL COMMENT '创建时间',
`update_by` bigint(20) DEFAULT NULL COMMENT '更新者',
`update_time` datetime DEFAULT NULL COMMENT '更新时间',
`remark` text COMMENT '备注',
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=1922929659800637443 DEFAULT CHARSET=utf8mb4 COMMENT='知识库附件图片列表';
SET FOREIGN_KEY_CHECKS = 1;

View File

@@ -0,0 +1,2 @@
ALTER TABLE `knowledge_attach`
MODIFY COLUMN `oss_id` bigint(20) NULL DEFAULT NULL COMMENT '对象存储ID' AFTER `doc_type`;