mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-06 08:17:31 +00:00
feat: 集成阿里百炼API实现图片内容识别功能
添加DashscopeService接口及实现,用于调用阿里百炼API进行图片内容识别 修改PdfImageExtractService增加基于百炼API的图片处理逻辑 新增OSS服务方法支持临时文件处理和删除 更新配置文件添加百炼模型相关配置
This commit is contained in:
111
ruoyi-admin/src/main/resources/application-local.yml
Normal file
111
ruoyi-admin/src/main/resources/application-local.yml
Normal file
@@ -0,0 +1,111 @@
|
||||
--- # 监控中心配置
|
||||
spring.boot.admin.client:
|
||||
# 增加客户端开关
|
||||
enabled: false
|
||||
url: http://localhost:9090/admin
|
||||
instance:
|
||||
service-host-type: IP
|
||||
username: ruoyi
|
||||
password: 123456
|
||||
|
||||
--- # 数据源配置
|
||||
spring:
|
||||
datasource:
|
||||
type: com.zaxxer.hikari.HikariDataSource
|
||||
# 动态数据源文档 https://www.kancloud.cn/tracy5546/dynamic-datasource/content
|
||||
dynamic:
|
||||
# 性能分析插件(有性能损耗 不建议生产环境使用)
|
||||
p6spy: true
|
||||
# 设置默认的数据源或者数据源组,默认值即为 master
|
||||
primary: master
|
||||
# 严格模式 匹配不到数据源则报错
|
||||
strict: true
|
||||
datasource:
|
||||
# 主库数据源
|
||||
master:
|
||||
type: ${spring.datasource.type}
|
||||
driverClassName: com.mysql.cj.jdbc.Driver
|
||||
url: jdbc:mysql://localhost:3306/ruoyi-ai?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true
|
||||
username: root
|
||||
password: root
|
||||
|
||||
|
||||
hikari:
|
||||
# 最大连接池数量
|
||||
maxPoolSize: 20
|
||||
# 最小空闲线程数量
|
||||
minIdle: 10
|
||||
# 配置获取连接等待超时的时间
|
||||
connectionTimeout: 30000
|
||||
# 校验超时时间
|
||||
validationTimeout: 5000
|
||||
# 空闲连接存活最大时间,默认10分钟
|
||||
idleTimeout: 600000
|
||||
# 此属性控制池中连接的最长生命周期,值0表示无限生命周期,默认30分钟
|
||||
maxLifetime: 1800000
|
||||
# 连接测试query(配置检测连接是否有效)
|
||||
connectionTestQuery: SELECT 1
|
||||
# 多久检查一次连接的活性
|
||||
keepaliveTime: 30000
|
||||
|
||||
--- # redis 单机配置(单机与集群只能开启一个另一个需要注释掉)
|
||||
spring.data:
|
||||
redis:
|
||||
# 地址
|
||||
host: 127.0.0.1
|
||||
# 端口,默认为6379
|
||||
port: 6379
|
||||
# 数据库索引
|
||||
database: 0
|
||||
# 密码(如没有密码请注释掉)
|
||||
password: root
|
||||
# 连接超时时间
|
||||
timeout: 10S
|
||||
# 是否开启ssl
|
||||
ssl: false
|
||||
|
||||
redisson:
|
||||
# redis key前缀
|
||||
keyPrefix:
|
||||
# 线程池数量
|
||||
threads: 4
|
||||
# Netty线程池数量
|
||||
nettyThreads: 8
|
||||
# 单节点配置
|
||||
singleServerConfig:
|
||||
# 客户端名称
|
||||
clientName: ${ruoyi.name}
|
||||
# 最小空闲连接数
|
||||
connectionMinimumIdleSize: 8
|
||||
# 连接池大小
|
||||
connectionPoolSize: 32
|
||||
# 连接空闲超时,单位:毫秒
|
||||
idleConnectionTimeout: 10000
|
||||
# 命令等待超时,单位:毫秒
|
||||
timeout: 3000
|
||||
# 发布和订阅连接池大小
|
||||
subscriptionConnectionPoolSize: 50
|
||||
|
||||
--- # sms 短信
|
||||
sms:
|
||||
enabled: false
|
||||
# 阿里云 dysmsapi.aliyuncs.com
|
||||
# 腾讯云 sms.tencentcloudapi.com
|
||||
endpoint: "dysmsapi.aliyuncs.com"
|
||||
accessKeyId: xxxxxxx
|
||||
accessKeySecret: xxxxxx
|
||||
signName: 测试
|
||||
# 腾讯专用
|
||||
sdkAppId:
|
||||
|
||||
pdf:
|
||||
extract:
|
||||
service:
|
||||
url: http://localhost:8080
|
||||
ai-api:
|
||||
url: https://api.pandarobot.chat/v1/chat/completions
|
||||
key: sk-xxxx
|
||||
#百炼模型配置
|
||||
dashscope:
|
||||
key: sk-0a4a86f3712b47ac825c1632319a8b1a
|
||||
model: qvq-max
|
||||
@@ -179,4 +179,8 @@ pdf:
|
||||
url: http://localhost:8080
|
||||
ai-api:
|
||||
url: https://api.pandarobot.chat/v1/chat/completions
|
||||
key: sk-XXXXXX
|
||||
key: sk-XXXXXX
|
||||
#百炼模型配置
|
||||
dashscope:
|
||||
key: sk-XXXX
|
||||
model: qvq-max
|
||||
@@ -119,6 +119,12 @@
|
||||
<artifactId>ruoyi-system-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.alibaba</groupId>
|
||||
<artifactId>dashscope-sdk-java</artifactId>
|
||||
<version>2.19.0</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
|
||||
@@ -0,0 +1,23 @@
|
||||
package org.ruoyi.service;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @Description: 阿里百炼api
|
||||
* @Date: 2025/6/4 下午2:24
|
||||
*/
|
||||
public interface DashscopeService {
|
||||
|
||||
/**
|
||||
* 视觉推理(QVQ)
|
||||
* @param imageUrl 图片可访问的地址
|
||||
* @return
|
||||
*/
|
||||
String qvq(String imageUrl) throws IOException;
|
||||
/**
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
String qvq4LocalPath(String localPath) throws IOException;
|
||||
}
|
||||
@@ -30,7 +30,23 @@ public interface PdfImageExtractService {
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
|
||||
/**
|
||||
*利用百炼接口处理文件内容
|
||||
*
|
||||
* @param imageUrl 传入图片地址
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException;
|
||||
|
||||
/**
|
||||
* 利用百炼接口处理文件内容
|
||||
*
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath)throws IOException;
|
||||
/**
|
||||
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
|
||||
* @param file
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
package org.ruoyi.service.impl;
|
||||
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
|
||||
import com.alibaba.dashscope.common.MultiModalMessage;
|
||||
import com.alibaba.dashscope.common.Role;
|
||||
import io.reactivex.Flowable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.service.DashscopeService;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* @Description: 阿里百炼API
|
||||
* @Date: 2025/6/4 下午2:28
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class DashscopeServiceImpl implements DashscopeService {
|
||||
|
||||
private static StringBuilder reasoningContent = new StringBuilder();
|
||||
private static StringBuilder finalContent = new StringBuilder();
|
||||
private static boolean isFirstPrint = true;
|
||||
|
||||
@Value("${dashscope.model}")
|
||||
private String serviceModel;
|
||||
@Value("${dashscope.key}")
|
||||
private String serviceKey;
|
||||
|
||||
/**
|
||||
* 视觉推理(QVQ)
|
||||
* @param imageUrl 图片可访问地址
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String qvq(String imageUrl) throws IOException {
|
||||
try {
|
||||
// 构建多模态消息
|
||||
MultiModalMessage userMessage = MultiModalMessage.builder()
|
||||
.role(Role.USER.getValue())
|
||||
.content(Arrays.asList(
|
||||
Collections.singletonMap("text", "这张图片有什么"),
|
||||
Collections.singletonMap("image", imageUrl)
|
||||
))
|
||||
.build();
|
||||
|
||||
// 构建请求参数
|
||||
MultiModalConversationParam param = MultiModalConversationParam.builder()
|
||||
.apiKey(serviceKey) // 使用配置文件中的API Key
|
||||
.model(serviceModel)
|
||||
.message(userMessage)
|
||||
.build();
|
||||
|
||||
MultiModalConversation conv = new MultiModalConversation();
|
||||
|
||||
// 调用API
|
||||
Flowable<MultiModalConversationResult> result = conv.streamCall(
|
||||
param);
|
||||
|
||||
reasoningContent = new StringBuilder();
|
||||
finalContent = new StringBuilder();
|
||||
isFirstPrint = true;
|
||||
|
||||
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
|
||||
|
||||
return finalContent.toString().replaceAll("[\n\r\s]", "");
|
||||
} catch (Exception e) {
|
||||
log.error("调用百炼API失败: {}", e.getMessage(), e);
|
||||
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String qvq4LocalPath(String localPath) throws IOException {
|
||||
try {
|
||||
// 构建多模态消息
|
||||
String filePath = "file://"+ localPath;
|
||||
log.info("filePath: {}", filePath);
|
||||
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
|
||||
.content(Arrays.asList(new HashMap<String, Object>(){{put("image", filePath);}},
|
||||
new HashMap<String, Object>(){{put("text", "这张图片有什么");}})).build();
|
||||
|
||||
// 构建请求参数
|
||||
MultiModalConversationParam param = MultiModalConversationParam.builder()
|
||||
.apiKey(serviceKey) // 使用配置文件中的API Key
|
||||
.model(serviceModel)
|
||||
.message(userMessage)
|
||||
.build();
|
||||
MultiModalConversation conv = new MultiModalConversation();
|
||||
|
||||
// 调用API
|
||||
Flowable<MultiModalConversationResult> result = conv.streamCall(
|
||||
param);
|
||||
|
||||
reasoningContent = new StringBuilder();
|
||||
finalContent = new StringBuilder();
|
||||
isFirstPrint = true;
|
||||
|
||||
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
|
||||
|
||||
return finalContent.toString().replaceAll("[\n\r\s]", "");
|
||||
} catch (Exception e) {
|
||||
log.error("调用百炼API失败: {}", e.getMessage(), e);
|
||||
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private static void handleGenerationResult(MultiModalConversationResult message) {
|
||||
|
||||
String re = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
|
||||
String reasoning = Objects.isNull(re) ? "" : re; // 默认值
|
||||
|
||||
List<Map<String, Object>> content = message.getOutput().getChoices().get(0).getMessage()
|
||||
.getContent();
|
||||
if (!reasoning.isEmpty()) {
|
||||
reasoningContent.append(reasoning);
|
||||
if (isFirstPrint) {
|
||||
System.out.println("====================思考过程====================");
|
||||
isFirstPrint = false;
|
||||
}
|
||||
System.out.print(reasoning);
|
||||
}
|
||||
|
||||
if (Objects.nonNull(content) && !content.isEmpty()) {
|
||||
Object text = content.get(0).get("text");
|
||||
finalContent.append(text);
|
||||
if (!isFirstPrint) {
|
||||
System.out.println("\n====================完整回复====================");
|
||||
isFirstPrint = true;
|
||||
}
|
||||
System.out.print(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,6 @@ import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
@@ -14,10 +13,11 @@ import okhttp3.OkHttpClient.Builder;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.ruoyi.common.core.domain.R;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.service.DashscopeService;
|
||||
import org.ruoyi.service.PdfImageExtractService;
|
||||
import org.ruoyi.utils.ZipUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
@@ -25,20 +25,21 @@ import org.springframework.web.multipart.MultipartFile;
|
||||
/**
|
||||
* PDF图片提取服务实现类
|
||||
*/
|
||||
//@Service
|
||||
@Service
|
||||
@Slf4j
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
|
||||
public class PdfImageExtractServiceImpl {
|
||||
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
|
||||
|
||||
// @Value("${pdf.extract.service.url}")
|
||||
@Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
// @Value("${pdf.extract.ai-api.url}")
|
||||
@Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
// @Value("${pdf.extract.ai-api.key}")
|
||||
@Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
@Autowired
|
||||
private DashscopeService dashscopeService;
|
||||
|
||||
private final OkHttpClient client = new Builder()
|
||||
.connectTimeout(100, TimeUnit.SECONDS)
|
||||
.readTimeout(150, TimeUnit.SECONDS)
|
||||
@@ -48,7 +49,7 @@ public class PdfImageExtractServiceImpl {
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
// @Override
|
||||
// @Override
|
||||
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
|
||||
throws IOException {
|
||||
// 构建multipart请求
|
||||
@@ -135,7 +136,43 @@ public class PdfImageExtractServiceImpl {
|
||||
return results;
|
||||
}
|
||||
|
||||
// @Override
|
||||
/**
|
||||
* 利用百炼接口处理文件内容
|
||||
*
|
||||
* @param imageUrl 传入图片地址
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
@Override
|
||||
public List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException {
|
||||
String qvq = dashscopeService.qvq(imageUrl);
|
||||
// 构建结果列表
|
||||
List<PdfFileContentResult> results = new ArrayList<>();
|
||||
String filename = "image_" + System.currentTimeMillis();
|
||||
results.add(new PdfFileContentResult(filename, qvq));
|
||||
return results;
|
||||
}
|
||||
|
||||
/**
|
||||
* 利用百炼接口处理文件内容
|
||||
*
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath) throws IOException {
|
||||
String qvq = dashscopeService.qvq4LocalPath(localPath);
|
||||
// 构建结果列表
|
||||
List<PdfFileContentResult> results = new ArrayList<>();
|
||||
String filename = "image_" + System.currentTimeMillis();
|
||||
results.add(new PdfFileContentResult(filename, qvq));
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// @Override
|
||||
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
|
||||
String format = "png";
|
||||
boolean allowDuplicates = true;
|
||||
|
||||
@@ -32,6 +32,15 @@ public interface ISysOssService {
|
||||
|
||||
String downloadByByte(Long ossId) throws IOException;
|
||||
|
||||
String downloadToTempPath(Long ossId) throws IOException;
|
||||
|
||||
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
|
||||
|
||||
/**
|
||||
* 根据文件路径删除文件
|
||||
*
|
||||
* @param filePath 文件路径
|
||||
* @return 是否删除成功
|
||||
*/
|
||||
boolean deleteFile(String filePath);
|
||||
}
|
||||
|
||||
@@ -213,4 +213,48 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
|
||||
}
|
||||
return oss;
|
||||
}
|
||||
@Override
|
||||
public String downloadToTempPath(Long ossId) throws IOException {
|
||||
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
|
||||
if (ObjectUtil.isNull(sysOss)) {
|
||||
throw new ServiceException("文件数据不存在!");
|
||||
}
|
||||
|
||||
OssClient storage = OssFactory.instance();
|
||||
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
|
||||
// 创建临时文件
|
||||
String suffix = StringUtils.isNotEmpty(sysOss.getFileSuffix()) ? sysOss.getFileSuffix() : "";
|
||||
java.io.File tempFile = java.io.File.createTempFile("download_", suffix);
|
||||
// 确保临时文件在JVM退出时删除
|
||||
tempFile.deleteOnExit();
|
||||
// 将输入流内容写入临时文件
|
||||
cn.hutool.core.io.FileUtil.writeFromStream(inputStream, tempFile);
|
||||
// 返回临时文件的绝对路径
|
||||
return tempFile.getAbsolutePath();
|
||||
} catch (Exception e) {
|
||||
throw new ServiceException(e.getMessage());
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 根据文件路径删除文件
|
||||
*
|
||||
* @param filePath 文件路径
|
||||
* @return 是否删除成功
|
||||
*/
|
||||
@Override
|
||||
public boolean deleteFile(String filePath) {
|
||||
if (StringUtils.isEmpty(filePath)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
java.io.File file = new java.io.File(filePath);
|
||||
if (file.exists() && file.isFile()) {
|
||||
return file.delete();
|
||||
}
|
||||
return false;
|
||||
} catch (Exception e) {
|
||||
throw new ServiceException("删除文件失败: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
|
||||
|
||||
private final IKnowledgeFragmentService fragmentService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
/**
|
||||
* 根据用户信息查询本地知识库
|
||||
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
|
||||
* @param file PDF文件
|
||||
* @return 文件名称和图片内容
|
||||
*/
|
||||
// @PostMapping("/extract-images")
|
||||
// @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
|
||||
// public R<List<PdfFileContentResult>> extractImages(
|
||||
// @RequestPart("file") MultipartFile file
|
||||
// ) throws IOException {
|
||||
// return R.ok(pdfImageExtractService.extractImages(file));
|
||||
// }
|
||||
@PostMapping("/extract-images")
|
||||
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
|
||||
public R<List<PdfFileContentResult>> extractImages(
|
||||
) throws IOException {
|
||||
return R.ok(pdfImageExtractService
|
||||
.dealFileContent4Dashscope("https://hnzuoran02-1327573163.cos.ap-nanjing.myqcloud.com/crmebimage/public/content/2025/06/04/e115264eb22f423ea0b211709361c29f071avy39ez.jpg"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ import org.ruoyi.mapper.KnowledgeAttachPicMapper;
|
||||
import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
||||
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.PdfImageExtractService;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
|
||||
import org.ruoyi.system.domain.vo.SysOssVo;
|
||||
@@ -64,18 +65,10 @@ public class DealFileService {
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
private final KnowledgeAttachPicMapper picMapper;
|
||||
|
||||
@Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
@Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
@Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
|
||||
@Async
|
||||
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
|
||||
try {
|
||||
@@ -169,8 +162,6 @@ public class DealFileService {
|
||||
//获取oss文件
|
||||
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
|
||||
//拆解出图片ZIP
|
||||
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
|
||||
aiApiUrl, aiApiKey);
|
||||
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
|
||||
//解压zip,得到图片文件
|
||||
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
|
||||
@@ -236,6 +227,7 @@ public class DealFileService {
|
||||
|
||||
@Async
|
||||
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
|
||||
String filePath = null;
|
||||
try {
|
||||
//锁定数据 更改 getPicAnysStatus 到进行中
|
||||
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
@@ -247,13 +239,10 @@ public class DealFileService {
|
||||
}
|
||||
SysOssVo ossVo = ossService.getById(picItem.getOssId());
|
||||
if (ObjectUtil.isNotEmpty(ossVo)) {
|
||||
String fileStr = ossService.downloadByByte(picItem.getOssId());
|
||||
filePath = ossService.downloadToTempPath(picItem.getOssId());
|
||||
//调用第三方 分析图片内容
|
||||
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
|
||||
serviceUrl,
|
||||
aiApiUrl, aiApiKey);
|
||||
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
|
||||
new String[]{fileStr});
|
||||
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent4DashscopeBase64(
|
||||
filePath);
|
||||
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
|
||||
for (PdfFileContentResult resultItem : pdfFileContentResults) {
|
||||
//图片解析内容回写到pic表
|
||||
@@ -302,6 +291,11 @@ public class DealFileService {
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttachPic::getId, picItem.getId()));
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
//无论成功还是失败,都要删除临时文件
|
||||
if (ObjectUtil.isNotEmpty(filePath)) {
|
||||
ossService.deleteFile(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -349,35 +343,32 @@ public class DealFileService {
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String parseContent(String jsonString) {
|
||||
public static String parseContent(String content) {
|
||||
try {
|
||||
// 创建ObjectMapper实例
|
||||
// 首先尝试作为JSON解析
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
JsonNode rootNode = objectMapper.readTree(content);
|
||||
|
||||
// 解析JSON字符串
|
||||
JsonNode rootNode = objectMapper.readTree(jsonString);
|
||||
|
||||
// 获取choices数组的第一个元素
|
||||
// 如果是JSON格式,按原有逻辑处理
|
||||
JsonNode choicesNode = rootNode.get("choices");
|
||||
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
|
||||
// 获取第一个choice
|
||||
JsonNode firstChoice = choicesNode.get(0);
|
||||
|
||||
// 获取message节点
|
||||
JsonNode messageNode = firstChoice.get("message");
|
||||
if (messageNode != null) {
|
||||
// 获取content字段的值
|
||||
JsonNode contentNode = messageNode.get("content");
|
||||
if (contentNode != null) {
|
||||
return contentNode.asText();
|
||||
}
|
||||
}
|
||||
return "无法找到content内容";
|
||||
}
|
||||
|
||||
return "无法找到content内容";
|
||||
|
||||
// 如果不是预期的JSON格式,直接返回原始内容
|
||||
return content;
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
return "解析JSON时发生错误: " + e.getMessage();
|
||||
// 如果解析JSON失败,说明是普通文本,直接返回
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,6 @@ import org.ruoyi.system.service.ISysOssService;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* 知识库Service业务层处理
|
||||
*
|
||||
@@ -83,7 +82,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
private final KnowledgeAttachPicMapper picMapper;
|
||||
|
||||
@@ -170,7 +169,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
* 保存前的数据校验
|
||||
*/
|
||||
private void validEntityBeforeSave(KnowledgeInfo entity) {
|
||||
//TODO 做一些数据校验,如唯一约束
|
||||
// TODO 做一些数据校验,如唯一约束
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -179,7 +178,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
@Override
|
||||
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
|
||||
if (isValid) {
|
||||
//TODO 做一些业务上的校验,判断是否需要校验
|
||||
// TODO 做一些业务上的校验,判断是否需要校验
|
||||
}
|
||||
return baseMapper.deleteBatchIds(ids) > 0;
|
||||
}
|
||||
@@ -223,10 +222,10 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
Collection<Long> ossIds = knowledgeAttachVos.stream()
|
||||
.map(KnowledgeAttachVo::getOssId)
|
||||
.collect(Collectors.toList());
|
||||
//删除oss
|
||||
// 删除oss
|
||||
ossService.deleteWithValidByIds(ossIds, false);
|
||||
|
||||
//删除图片oss
|
||||
// 删除图片oss
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.in(KnowledgeAttachPic::getKid,
|
||||
@@ -234,8 +233,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
.collect(Collectors.toList()))
|
||||
.in(KnowledgeAttachPic::getAid,
|
||||
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
|
||||
.collect(Collectors.toList()))
|
||||
);
|
||||
.collect(Collectors.toList())));
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
Collection<Long> tossIds = knowledgeAttachPics.stream()
|
||||
.map(KnowledgeAttachPic::getOssId)
|
||||
@@ -302,7 +300,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
knowledgeAttach.setCreateTime(new Date());
|
||||
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
|
||||
knowledgeAttach.setOssId(uploadDto.getOssId());
|
||||
//只有pdf文件 才需要拆解图片和分析图片内容
|
||||
// 只有pdf文件 才需要拆解图片和分析图片内容
|
||||
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
|
||||
@@ -310,7 +308,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
|
||||
}
|
||||
//所有文件上传后,都需要同步到向量数据库
|
||||
// 所有文件上传后,都需要同步到向量数据库
|
||||
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
|
||||
}
|
||||
attachMapper.insert(knowledgeAttach);
|
||||
@@ -334,15 +332,14 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
/**
|
||||
* 第一步 定时 拆解PDF文件中的图片
|
||||
*/
|
||||
//@Scheduled(fixedDelay = 15000) // 每3秒执行一次
|
||||
@Scheduled(fixedDelay = 15000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttachPic() throws Exception {
|
||||
//处理 拆解PDF文件中的图片的记录
|
||||
// 处理 拆解PDF文件中的图片的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
|
||||
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
@@ -356,30 +353,29 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
*/
|
||||
@Scheduled(fixedDelay = 15000)
|
||||
public void dealKnowledgeAttachPicAnys() throws Exception {
|
||||
//获取未处理的图片记录
|
||||
// 获取未处理的图片记录
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.last("LIMIT 20")
|
||||
);
|
||||
.last("LIMIT 20"));
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
|
||||
dealFileService.dealPicAnysStatus(picItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第三步 定时 处理 附件上传后上传向量数据库
|
||||
*/
|
||||
@Scheduled(fixedDelay = 30000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttachVector() throws Exception {
|
||||
//处理 需要上传向量数据库的记录
|
||||
// 处理 需要上传向量数据库的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
|
||||
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
@@ -387,23 +383,24 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第四步 定时 处理 失败数据
|
||||
*/
|
||||
@Scheduled(fixedDelay = 30 * 60 * 1000)
|
||||
public void dealKnowledge40Status() throws Exception {
|
||||
//拆解PDF失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
|
||||
//将图片分析失败的数据 重新设置状态
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
|
||||
//上传向量库失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
|
||||
// 拆解PDF失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
|
||||
// 将图片分析失败的数据 重新设置状态
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
|
||||
// 上传向量库失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
2
script/sql/update/202506041541.sql
Normal file
2
script/sql/update/202506041541.sql
Normal file
@@ -0,0 +1,2 @@
|
||||
ALTER TABLE `knowledge_attach`
|
||||
MODIFY COLUMN `oss_id` bigint(20) NULL DEFAULT NULL COMMENT '对象存储ID' AFTER `doc_type`;
|
||||
Reference in New Issue
Block a user