feat: 集成阿里百炼API实现图片内容识别功能

添加DashscopeService接口及实现,用于调用阿里百炼API进行图片内容识别
修改PdfImageExtractService增加基于百炼API的图片处理逻辑
新增OSS服务方法支持临时文件处理和删除
更新配置文件添加百炼模型相关配置
This commit is contained in:
zhouweiyi
2025-06-04 17:55:47 +08:00
parent 53e3180658
commit 1d51a103d0
13 changed files with 472 additions and 83 deletions

View File

@@ -0,0 +1,111 @@
--- # 监控中心配置
spring.boot.admin.client:
# 增加客户端开关
enabled: false
url: http://localhost:9090/admin
instance:
service-host-type: IP
username: ruoyi
password: 123456
--- # 数据源配置
spring:
datasource:
type: com.zaxxer.hikari.HikariDataSource
# 动态数据源文档 https://www.kancloud.cn/tracy5546/dynamic-datasource/content
dynamic:
# 性能分析插件(有性能损耗 不建议生产环境使用)
p6spy: true
# 设置默认的数据源或者数据源组,默认值即为 master
primary: master
# 严格模式 匹配不到数据源则报错
strict: true
datasource:
# 主库数据源
master:
type: ${spring.datasource.type}
driverClassName: com.mysql.cj.jdbc.Driver
url: jdbc:mysql://localhost:3306/ruoyi-ai?useUnicode=true&characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&useSSL=true&serverTimezone=GMT%2B8&autoReconnect=true&rewriteBatchedStatements=true
username: root
password: root
hikari:
# 最大连接池数量
maxPoolSize: 20
# 最小空闲线程数量
minIdle: 10
# 配置获取连接等待超时的时间
connectionTimeout: 30000
# 校验超时时间
validationTimeout: 5000
# 空闲连接存活最大时间默认10分钟
idleTimeout: 600000
# 此属性控制池中连接的最长生命周期值0表示无限生命周期默认30分钟
maxLifetime: 1800000
# 连接测试query配置检测连接是否有效
connectionTestQuery: SELECT 1
# 多久检查一次连接的活性
keepaliveTime: 30000
--- # redis 单机配置(单机与集群只能开启一个另一个需要注释掉)
spring.data:
redis:
# 地址
host: 127.0.0.1
# 端口默认为6379
port: 6379
# 数据库索引
database: 0
# 密码(如没有密码请注释掉)
password: root
# 连接超时时间
timeout: 10S
# 是否开启ssl
ssl: false
redisson:
# redis key前缀
keyPrefix:
# 线程池数量
threads: 4
# Netty线程池数量
nettyThreads: 8
# 单节点配置
singleServerConfig:
# 客户端名称
clientName: ${ruoyi.name}
# 最小空闲连接数
connectionMinimumIdleSize: 8
# 连接池大小
connectionPoolSize: 32
# 连接空闲超时,单位:毫秒
idleConnectionTimeout: 10000
# 命令等待超时,单位:毫秒
timeout: 3000
# 发布和订阅连接池大小
subscriptionConnectionPoolSize: 50
--- # sms 短信
sms:
enabled: false
# 阿里云 dysmsapi.aliyuncs.com
# 腾讯云 sms.tencentcloudapi.com
endpoint: "dysmsapi.aliyuncs.com"
accessKeyId: xxxxxxx
accessKeySecret: xxxxxx
signName: 测试
# 腾讯专用
sdkAppId:
pdf:
extract:
service:
url: http://localhost:8080
ai-api:
url: https://api.pandarobot.chat/v1/chat/completions
key: sk-xxxx
#百炼模型配置
dashscope:
key: sk-0a4a86f3712b47ac825c1632319a8b1a
model: qvq-max

View File

@@ -179,4 +179,8 @@ pdf:
url: http://localhost:8080
ai-api:
url: https://api.pandarobot.chat/v1/chat/completions
key: sk-XXXXXX
key: sk-XXXXXX
#百炼模型配置
dashscope:
key: sk-XXXX
model: qvq-max

View File

@@ -119,6 +119,12 @@
<artifactId>ruoyi-system-api</artifactId>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>dashscope-sdk-java</artifactId>
<version>2.19.0</version>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,23 @@
package org.ruoyi.service;
import java.io.IOException;
/**
* @Description: 阿里百炼api
* @Date: 2025/6/4 下午2:24
*/
public interface DashscopeService {
/**
* 视觉推理QVQ
* @param imageUrl 图片可访问的地址
* @return
*/
String qvq(String imageUrl) throws IOException;
/**
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
String qvq4LocalPath(String localPath) throws IOException;
}

View File

@@ -30,7 +30,23 @@ public interface PdfImageExtractService {
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
/**
*利用百炼接口处理文件内容
*
* @param imageUrl 传入图片地址
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException;
/**
* 利用百炼接口处理文件内容
*
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath)throws IOException;
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
* @param file

View File

@@ -0,0 +1,149 @@
package org.ruoyi.service.impl;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import io.reactivex.Flowable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.DashscopeService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* @Description: 阿里百炼API
* @Date: 2025/6/4 下午2:28
*/
@Service
@Slf4j
public class DashscopeServiceImpl implements DashscopeService {
private static StringBuilder reasoningContent = new StringBuilder();
private static StringBuilder finalContent = new StringBuilder();
private static boolean isFirstPrint = true;
@Value("${dashscope.model}")
private String serviceModel;
@Value("${dashscope.key}")
private String serviceKey;
/**
* 视觉推理QVQ
* @param imageUrl 图片可访问地址
* @return
*/
@Override
public String qvq(String imageUrl) throws IOException {
try {
// 构建多模态消息
MultiModalMessage userMessage = MultiModalMessage.builder()
.role(Role.USER.getValue())
.content(Arrays.asList(
Collections.singletonMap("text", "这张图片有什么"),
Collections.singletonMap("image", imageUrl)
))
.build();
// 构建请求参数
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(serviceKey) // 使用配置文件中的API Key
.model(serviceModel)
.message(userMessage)
.build();
MultiModalConversation conv = new MultiModalConversation();
// 调用API
Flowable<MultiModalConversationResult> result = conv.streamCall(
param);
reasoningContent = new StringBuilder();
finalContent = new StringBuilder();
isFirstPrint = true;
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
return finalContent.toString().replaceAll("[\n\r\s]", "");
} catch (Exception e) {
log.error("调用百炼API失败: {}", e.getMessage(), e);
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
}
}
/**
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
@Override
public String qvq4LocalPath(String localPath) throws IOException {
try {
// 构建多模态消息
String filePath = "file://"+ localPath;
log.info("filePath: {}", filePath);
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(new HashMap<String, Object>(){{put("image", filePath);}},
new HashMap<String, Object>(){{put("text", "这张图片有什么");}})).build();
// 构建请求参数
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(serviceKey) // 使用配置文件中的API Key
.model(serviceModel)
.message(userMessage)
.build();
MultiModalConversation conv = new MultiModalConversation();
// 调用API
Flowable<MultiModalConversationResult> result = conv.streamCall(
param);
reasoningContent = new StringBuilder();
finalContent = new StringBuilder();
isFirstPrint = true;
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
return finalContent.toString().replaceAll("[\n\r\s]", "");
} catch (Exception e) {
log.error("调用百炼API失败: {}", e.getMessage(), e);
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
}
}
private static void handleGenerationResult(MultiModalConversationResult message) {
String re = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
String reasoning = Objects.isNull(re) ? "" : re; // 默认值
List<Map<String, Object>> content = message.getOutput().getChoices().get(0).getMessage()
.getContent();
if (!reasoning.isEmpty()) {
reasoningContent.append(reasoning);
if (isFirstPrint) {
System.out.println("====================思考过程====================");
isFirstPrint = false;
}
System.out.print(reasoning);
}
if (Objects.nonNull(content) && !content.isEmpty()) {
Object text = content.get(0).get("text");
finalContent.append(text);
if (!isFirstPrint) {
System.out.println("\n====================完整回复====================");
isFirstPrint = true;
}
System.out.print(text);
}
}
}

View File

@@ -4,7 +4,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
@@ -14,10 +13,11 @@ import okhttp3.OkHttpClient.Builder;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.DashscopeService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
@@ -25,20 +25,21 @@ import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
//@Service
@Service
@Slf4j
@Data
@AllArgsConstructor
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
public class PdfImageExtractServiceImpl {
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
// @Value("${pdf.extract.service.url}")
@Value("${pdf.extract.service.url}")
private String serviceUrl;
// @Value("${pdf.extract.ai-api.url}")
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
// @Value("${pdf.extract.ai-api.key}")
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Autowired
private DashscopeService dashscopeService;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
.readTimeout(150, TimeUnit.SECONDS)
@@ -48,7 +49,7 @@ public class PdfImageExtractServiceImpl {
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
// @Override
// @Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
@@ -135,7 +136,43 @@ public class PdfImageExtractServiceImpl {
return results;
}
// @Override
/**
* 利用百炼接口处理文件内容
*
* @param imageUrl 传入图片地址
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
public List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException {
String qvq = dashscopeService.qvq(imageUrl);
// 构建结果列表
List<PdfFileContentResult> results = new ArrayList<>();
String filename = "image_" + System.currentTimeMillis();
results.add(new PdfFileContentResult(filename, qvq));
return results;
}
/**
* 利用百炼接口处理文件内容
*
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
@Override
public List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath) throws IOException {
String qvq = dashscopeService.qvq4LocalPath(localPath);
// 构建结果列表
List<PdfFileContentResult> results = new ArrayList<>();
String filename = "image_" + System.currentTimeMillis();
results.add(new PdfFileContentResult(filename, qvq));
return results;
}
// @Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;

View File

@@ -32,6 +32,15 @@ public interface ISysOssService {
String downloadByByte(Long ossId) throws IOException;
String downloadToTempPath(Long ossId) throws IOException;
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
/**
* 根据文件路径删除文件
*
* @param filePath 文件路径
* @return 是否删除成功
*/
boolean deleteFile(String filePath);
}

View File

@@ -213,4 +213,48 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
}
return oss;
}
@Override
public String downloadToTempPath(Long ossId) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
// 创建临时文件
String suffix = StringUtils.isNotEmpty(sysOss.getFileSuffix()) ? sysOss.getFileSuffix() : "";
java.io.File tempFile = java.io.File.createTempFile("download_", suffix);
// 确保临时文件在JVM退出时删除
tempFile.deleteOnExit();
// 将输入流内容写入临时文件
cn.hutool.core.io.FileUtil.writeFromStream(inputStream, tempFile);
// 返回临时文件的绝对路径
return tempFile.getAbsolutePath();
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
/**
* 根据文件路径删除文件
*
* @param filePath 文件路径
* @return 是否删除成功
*/
@Override
public boolean deleteFile(String filePath) {
if (StringUtils.isEmpty(filePath)) {
return false;
}
try {
java.io.File file = new java.io.File(filePath);
if (file.exists() && file.isFile()) {
return file.delete();
}
return false;
} catch (Exception e) {
throw new ServiceException("删除文件失败: " + e.getMessage());
}
}
}

View File

@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
private final IKnowledgeFragmentService fragmentService;
// private final PdfImageExtractService pdfImageExtractService;
private final PdfImageExtractService pdfImageExtractService;
/**
* 根据用户信息查询本地知识库
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
* @param file PDF文件
* @return 文件名称和图片内容
*/
// @PostMapping("/extract-images")
// @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
// public R<List<PdfFileContentResult>> extractImages(
// @RequestPart("file") MultipartFile file
// ) throws IOException {
// return R.ok(pdfImageExtractService.extractImages(file));
// }
@PostMapping("/extract-images")
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
public R<List<PdfFileContentResult>> extractImages(
) throws IOException {
return R.ok(pdfImageExtractService
.dealFileContent4Dashscope("https://hnzuoran02-1327573163.cos.ap-nanjing.myqcloud.com/crmebimage/public/content/2025/06/04/e115264eb22f423ea0b211709361c29f071avy39ez.jpg"));
}
}

View File

@@ -26,6 +26,7 @@ import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
@@ -64,18 +65,10 @@ public class DealFileService {
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Async
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
try {
@@ -169,8 +162,6 @@ public class DealFileService {
//获取oss文件
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
//拆解出图片ZIP
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
aiApiUrl, aiApiKey);
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
//解压zip得到图片文件
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
@@ -236,6 +227,7 @@ public class DealFileService {
@Async
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
String filePath = null;
try {
//锁定数据 更改 getPicAnysStatus 到进行中
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
@@ -247,13 +239,10 @@ public class DealFileService {
}
SysOssVo ossVo = ossService.getById(picItem.getOssId());
if (ObjectUtil.isNotEmpty(ossVo)) {
String fileStr = ossService.downloadByByte(picItem.getOssId());
filePath = ossService.downloadToTempPath(picItem.getOssId());
//调用第三方 分析图片内容
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
serviceUrl,
aiApiUrl, aiApiKey);
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
new String[]{fileStr});
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent4DashscopeBase64(
filePath);
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
for (PdfFileContentResult resultItem : pdfFileContentResults) {
//图片解析内容回写到pic表
@@ -302,6 +291,11 @@ public class DealFileService {
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getId, picItem.getId()));
throw new RuntimeException(e);
} finally {
//无论成功还是失败,都要删除临时文件
if (ObjectUtil.isNotEmpty(filePath)) {
ossService.deleteFile(filePath);
}
}
}
@@ -349,35 +343,32 @@ public class DealFileService {
return null;
}
public static String parseContent(String jsonString) {
public static String parseContent(String content) {
try {
// 创建ObjectMapper实例
// 首先尝试作为JSON解析
ObjectMapper objectMapper = new ObjectMapper();
JsonNode rootNode = objectMapper.readTree(content);
// 解析JSON字符串
JsonNode rootNode = objectMapper.readTree(jsonString);
// 获取choices数组的第一个元素
// 如果是JSON格式,按原有逻辑处理
JsonNode choicesNode = rootNode.get("choices");
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
// 获取第一个choice
JsonNode firstChoice = choicesNode.get(0);
// 获取message节点
JsonNode messageNode = firstChoice.get("message");
if (messageNode != null) {
// 获取content字段的值
JsonNode contentNode = messageNode.get("content");
if (contentNode != null) {
return contentNode.asText();
}
}
return "无法找到content内容";
}
return "无法找到content内容";
// 如果不是预期的JSON格式直接返回原始内容
return content;
} catch (Exception e) {
e.printStackTrace();
return "解析JSON时发生错误: " + e.getMessage();
// 如果解析JSON失败说明是普通文本直接返回
return content;
}
}

View File

@@ -57,7 +57,6 @@ import org.ruoyi.system.service.ISysOssService;
import java.io.IOException;
import java.util.*;
/**
* 知识库Service业务层处理
*
@@ -83,7 +82,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
// private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
@@ -170,7 +169,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeInfo entity) {
//TODO 做一些数据校验,如唯一约束
// TODO 做一些数据校验,如唯一约束
}
/**
@@ -179,7 +178,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
// TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@@ -223,10 +222,10 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
// 删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
// 删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
@@ -234,8 +233,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
.collect(Collectors.toList())));
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
@@ -302,7 +300,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
knowledgeAttach.setCreateTime(new Date());
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
knowledgeAttach.setOssId(uploadDto.getOssId());
//只有pdf文件 才需要拆解图片和分析图片内容
// 只有pdf文件 才需要拆解图片和分析图片内容
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
@@ -310,7 +308,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
}
//所有文件上传后,都需要同步到向量数据库
// 所有文件上传后,都需要同步到向量数据库
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
}
attachMapper.insert(knowledgeAttach);
@@ -334,15 +332,14 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
/**
* 第一步 定时 拆解PDF文件中的图片
*/
//@Scheduled(fixedDelay = 15000) // 每3秒执行一次
@Scheduled(fixedDelay = 15000) // 每3秒执行一次
public void dealKnowledgeAttachPic() throws Exception {
//处理 拆解PDF文件中的图片的记录
// 处理 拆解PDF文件中的图片的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
@@ -356,30 +353,29 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
*/
@Scheduled(fixedDelay = 15000)
public void dealKnowledgeAttachPicAnys() throws Exception {
//获取未处理的图片记录
// 获取未处理的图片记录
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.last("LIMIT 20")
);
.last("LIMIT 20"));
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
dealFileService.dealPicAnysStatus(picItem);
}
}
}
/**
* 第三步 定时 处理 附件上传后上传向量数据库
*/
@Scheduled(fixedDelay = 30000) // 每3秒执行一次
public void dealKnowledgeAttachVector() throws Exception {
//处理 需要上传向量数据库的记录
// 处理 需要上传向量数据库的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
@@ -387,23 +383,24 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
}
}
}
/**
* 第四步 定时 处理 失败数据
*/
@Scheduled(fixedDelay = 30 * 60 * 1000)
public void dealKnowledge40Status() throws Exception {
//拆解PDF失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
//将图片分析失败的数据 重新设置状态
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
//上传向量库失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
// 拆解PDF失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
// 将图片分析失败的数据 重新设置状态
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
// 上传向量库失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
}
}

View File

@@ -0,0 +1,2 @@
ALTER TABLE `knowledge_attach`
MODIFY COLUMN `oss_id` bigint(20) NULL DEFAULT NULL COMMENT '对象存储ID' AFTER `doc_type`;