mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-08 09:17:33 +00:00
feat: 集成阿里百炼API实现图片内容识别功能
添加DashscopeService接口及实现,用于调用阿里百炼API进行图片内容识别 修改PdfImageExtractService增加基于百炼API的图片处理逻辑 新增OSS服务方法支持临时文件处理和删除 更新配置文件添加百炼模型相关配置
This commit is contained in:
@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
|
||||
|
||||
private final IKnowledgeFragmentService fragmentService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
/**
|
||||
* 根据用户信息查询本地知识库
|
||||
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
|
||||
* @param file PDF文件
|
||||
* @return 文件名称和图片内容
|
||||
*/
|
||||
// @PostMapping("/extract-images")
|
||||
// @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
|
||||
// public R<List<PdfFileContentResult>> extractImages(
|
||||
// @RequestPart("file") MultipartFile file
|
||||
// ) throws IOException {
|
||||
// return R.ok(pdfImageExtractService.extractImages(file));
|
||||
// }
|
||||
@PostMapping("/extract-images")
|
||||
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
|
||||
public R<List<PdfFileContentResult>> extractImages(
|
||||
) throws IOException {
|
||||
return R.ok(pdfImageExtractService
|
||||
.dealFileContent4Dashscope("https://hnzuoran02-1327573163.cos.ap-nanjing.myqcloud.com/crmebimage/public/content/2025/06/04/e115264eb22f423ea0b211709361c29f071avy39ez.jpg"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ import org.ruoyi.mapper.KnowledgeAttachPicMapper;
|
||||
import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
||||
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.PdfImageExtractService;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
|
||||
import org.ruoyi.system.domain.vo.SysOssVo;
|
||||
@@ -64,18 +65,10 @@ public class DealFileService {
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
private final KnowledgeAttachPicMapper picMapper;
|
||||
|
||||
@Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
@Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
@Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
|
||||
@Async
|
||||
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
|
||||
try {
|
||||
@@ -169,8 +162,6 @@ public class DealFileService {
|
||||
//获取oss文件
|
||||
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
|
||||
//拆解出图片ZIP
|
||||
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
|
||||
aiApiUrl, aiApiKey);
|
||||
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
|
||||
//解压zip,得到图片文件
|
||||
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
|
||||
@@ -236,6 +227,7 @@ public class DealFileService {
|
||||
|
||||
@Async
|
||||
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
|
||||
String filePath = null;
|
||||
try {
|
||||
//锁定数据 更改 getPicAnysStatus 到进行中
|
||||
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
@@ -247,13 +239,10 @@ public class DealFileService {
|
||||
}
|
||||
SysOssVo ossVo = ossService.getById(picItem.getOssId());
|
||||
if (ObjectUtil.isNotEmpty(ossVo)) {
|
||||
String fileStr = ossService.downloadByByte(picItem.getOssId());
|
||||
filePath = ossService.downloadToTempPath(picItem.getOssId());
|
||||
//调用第三方 分析图片内容
|
||||
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
|
||||
serviceUrl,
|
||||
aiApiUrl, aiApiKey);
|
||||
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
|
||||
new String[]{fileStr});
|
||||
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent4DashscopeBase64(
|
||||
filePath);
|
||||
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
|
||||
for (PdfFileContentResult resultItem : pdfFileContentResults) {
|
||||
//图片解析内容回写到pic表
|
||||
@@ -302,6 +291,11 @@ public class DealFileService {
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttachPic::getId, picItem.getId()));
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
//无论成功还是失败,都要删除临时文件
|
||||
if (ObjectUtil.isNotEmpty(filePath)) {
|
||||
ossService.deleteFile(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -349,35 +343,32 @@ public class DealFileService {
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String parseContent(String jsonString) {
|
||||
public static String parseContent(String content) {
|
||||
try {
|
||||
// 创建ObjectMapper实例
|
||||
// 首先尝试作为JSON解析
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
JsonNode rootNode = objectMapper.readTree(content);
|
||||
|
||||
// 解析JSON字符串
|
||||
JsonNode rootNode = objectMapper.readTree(jsonString);
|
||||
|
||||
// 获取choices数组的第一个元素
|
||||
// 如果是JSON格式,按原有逻辑处理
|
||||
JsonNode choicesNode = rootNode.get("choices");
|
||||
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
|
||||
// 获取第一个choice
|
||||
JsonNode firstChoice = choicesNode.get(0);
|
||||
|
||||
// 获取message节点
|
||||
JsonNode messageNode = firstChoice.get("message");
|
||||
if (messageNode != null) {
|
||||
// 获取content字段的值
|
||||
JsonNode contentNode = messageNode.get("content");
|
||||
if (contentNode != null) {
|
||||
return contentNode.asText();
|
||||
}
|
||||
}
|
||||
return "无法找到content内容";
|
||||
}
|
||||
|
||||
return "无法找到content内容";
|
||||
|
||||
// 如果不是预期的JSON格式,直接返回原始内容
|
||||
return content;
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
return "解析JSON时发生错误: " + e.getMessage();
|
||||
// 如果解析JSON失败,说明是普通文本,直接返回
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,6 @@ import org.ruoyi.system.service.ISysOssService;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* 知识库Service业务层处理
|
||||
*
|
||||
@@ -83,7 +82,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
private final KnowledgeAttachPicMapper picMapper;
|
||||
|
||||
@@ -170,7 +169,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
* 保存前的数据校验
|
||||
*/
|
||||
private void validEntityBeforeSave(KnowledgeInfo entity) {
|
||||
//TODO 做一些数据校验,如唯一约束
|
||||
// TODO 做一些数据校验,如唯一约束
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -179,7 +178,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
@Override
|
||||
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
|
||||
if (isValid) {
|
||||
//TODO 做一些业务上的校验,判断是否需要校验
|
||||
// TODO 做一些业务上的校验,判断是否需要校验
|
||||
}
|
||||
return baseMapper.deleteBatchIds(ids) > 0;
|
||||
}
|
||||
@@ -223,10 +222,10 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
Collection<Long> ossIds = knowledgeAttachVos.stream()
|
||||
.map(KnowledgeAttachVo::getOssId)
|
||||
.collect(Collectors.toList());
|
||||
//删除oss
|
||||
// 删除oss
|
||||
ossService.deleteWithValidByIds(ossIds, false);
|
||||
|
||||
//删除图片oss
|
||||
// 删除图片oss
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.in(KnowledgeAttachPic::getKid,
|
||||
@@ -234,8 +233,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
.collect(Collectors.toList()))
|
||||
.in(KnowledgeAttachPic::getAid,
|
||||
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
|
||||
.collect(Collectors.toList()))
|
||||
);
|
||||
.collect(Collectors.toList())));
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
Collection<Long> tossIds = knowledgeAttachPics.stream()
|
||||
.map(KnowledgeAttachPic::getOssId)
|
||||
@@ -302,7 +300,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
knowledgeAttach.setCreateTime(new Date());
|
||||
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
|
||||
knowledgeAttach.setOssId(uploadDto.getOssId());
|
||||
//只有pdf文件 才需要拆解图片和分析图片内容
|
||||
// 只有pdf文件 才需要拆解图片和分析图片内容
|
||||
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
|
||||
@@ -310,7 +308,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
|
||||
}
|
||||
//所有文件上传后,都需要同步到向量数据库
|
||||
// 所有文件上传后,都需要同步到向量数据库
|
||||
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
|
||||
}
|
||||
attachMapper.insert(knowledgeAttach);
|
||||
@@ -334,15 +332,14 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
/**
|
||||
* 第一步 定时 拆解PDF文件中的图片
|
||||
*/
|
||||
//@Scheduled(fixedDelay = 15000) // 每3秒执行一次
|
||||
@Scheduled(fixedDelay = 15000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttachPic() throws Exception {
|
||||
//处理 拆解PDF文件中的图片的记录
|
||||
// 处理 拆解PDF文件中的图片的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
|
||||
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
@@ -356,30 +353,29 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
*/
|
||||
@Scheduled(fixedDelay = 15000)
|
||||
public void dealKnowledgeAttachPicAnys() throws Exception {
|
||||
//获取未处理的图片记录
|
||||
// 获取未处理的图片记录
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.last("LIMIT 20")
|
||||
);
|
||||
.last("LIMIT 20"));
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
|
||||
dealFileService.dealPicAnysStatus(picItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第三步 定时 处理 附件上传后上传向量数据库
|
||||
*/
|
||||
@Scheduled(fixedDelay = 30000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttachVector() throws Exception {
|
||||
//处理 需要上传向量数据库的记录
|
||||
// 处理 需要上传向量数据库的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
|
||||
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
@@ -387,23 +383,24 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第四步 定时 处理 失败数据
|
||||
*/
|
||||
@Scheduled(fixedDelay = 30 * 60 * 1000)
|
||||
public void dealKnowledge40Status() throws Exception {
|
||||
//拆解PDF失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
|
||||
//将图片分析失败的数据 重新设置状态
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
|
||||
//上传向量库失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
|
||||
// 拆解PDF失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
|
||||
// 将图片分析失败的数据 重新设置状态
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
|
||||
// 上传向量库失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user