feat: 集成阿里百炼API实现图片内容识别功能

添加DashscopeService接口及实现,用于调用阿里百炼API进行图片内容识别
修改PdfImageExtractService增加基于百炼API的图片处理逻辑
新增OSS服务方法支持临时文件处理和删除
更新配置文件添加百炼模型相关配置
This commit is contained in:
zhouweiyi
2025-06-04 17:55:47 +08:00
parent 53e3180658
commit 1d51a103d0
13 changed files with 472 additions and 83 deletions

View File

@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
private final IKnowledgeFragmentService fragmentService;
// private final PdfImageExtractService pdfImageExtractService;
private final PdfImageExtractService pdfImageExtractService;
/**
* 根据用户信息查询本地知识库
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
* @param file PDF文件
* @return 文件名称和图片内容
*/
// @PostMapping("/extract-images")
// @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
// public R<List<PdfFileContentResult>> extractImages(
// @RequestPart("file") MultipartFile file
// ) throws IOException {
// return R.ok(pdfImageExtractService.extractImages(file));
// }
@PostMapping("/extract-images")
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
public R<List<PdfFileContentResult>> extractImages(
) throws IOException {
return R.ok(pdfImageExtractService
.dealFileContent4Dashscope("https://hnzuoran02-1327573163.cos.ap-nanjing.myqcloud.com/crmebimage/public/content/2025/06/04/e115264eb22f423ea0b211709361c29f071avy39ez.jpg"));
}
}

View File

@@ -26,6 +26,7 @@ import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
@@ -64,18 +65,10 @@ public class DealFileService {
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Async
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
try {
@@ -169,8 +162,6 @@ public class DealFileService {
//获取oss文件
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
//拆解出图片ZIP
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
aiApiUrl, aiApiKey);
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
//解压zip得到图片文件
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
@@ -236,6 +227,7 @@ public class DealFileService {
@Async
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
String filePath = null;
try {
//锁定数据 更改 getPicAnysStatus 到进行中
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
@@ -247,13 +239,10 @@ public class DealFileService {
}
SysOssVo ossVo = ossService.getById(picItem.getOssId());
if (ObjectUtil.isNotEmpty(ossVo)) {
String fileStr = ossService.downloadByByte(picItem.getOssId());
filePath = ossService.downloadToTempPath(picItem.getOssId());
//调用第三方 分析图片内容
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
serviceUrl,
aiApiUrl, aiApiKey);
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
new String[]{fileStr});
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent4DashscopeBase64(
filePath);
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
for (PdfFileContentResult resultItem : pdfFileContentResults) {
//图片解析内容回写到pic表
@@ -302,6 +291,11 @@ public class DealFileService {
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getId, picItem.getId()));
throw new RuntimeException(e);
} finally {
//无论成功还是失败,都要删除临时文件
if (ObjectUtil.isNotEmpty(filePath)) {
ossService.deleteFile(filePath);
}
}
}
@@ -349,35 +343,32 @@ public class DealFileService {
return null;
}
public static String parseContent(String jsonString) {
public static String parseContent(String content) {
try {
// 创建ObjectMapper实例
// 首先尝试作为JSON解析
ObjectMapper objectMapper = new ObjectMapper();
JsonNode rootNode = objectMapper.readTree(content);
// 解析JSON字符串
JsonNode rootNode = objectMapper.readTree(jsonString);
// 获取choices数组的第一个元素
// 如果是JSON格式,按原有逻辑处理
JsonNode choicesNode = rootNode.get("choices");
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
// 获取第一个choice
JsonNode firstChoice = choicesNode.get(0);
// 获取message节点
JsonNode messageNode = firstChoice.get("message");
if (messageNode != null) {
// 获取content字段的值
JsonNode contentNode = messageNode.get("content");
if (contentNode != null) {
return contentNode.asText();
}
}
return "无法找到content内容";
}
return "无法找到content内容";
// 如果不是预期的JSON格式直接返回原始内容
return content;
} catch (Exception e) {
e.printStackTrace();
return "解析JSON时发生错误: " + e.getMessage();
// 如果解析JSON失败说明是普通文本直接返回
return content;
}
}

View File

@@ -57,7 +57,6 @@ import org.ruoyi.system.service.ISysOssService;
import java.io.IOException;
import java.util.*;
/**
* 知识库Service业务层处理
*
@@ -83,7 +82,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
// private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
@@ -170,7 +169,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeInfo entity) {
//TODO 做一些数据校验,如唯一约束
// TODO 做一些数据校验,如唯一约束
}
/**
@@ -179,7 +178,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
// TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@@ -223,10 +222,10 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
// 删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
// 删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
@@ -234,8 +233,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
.collect(Collectors.toList())));
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
@@ -302,7 +300,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
knowledgeAttach.setCreateTime(new Date());
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
knowledgeAttach.setOssId(uploadDto.getOssId());
//只有pdf文件 才需要拆解图片和分析图片内容
// 只有pdf文件 才需要拆解图片和分析图片内容
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
@@ -310,7 +308,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
}
//所有文件上传后,都需要同步到向量数据库
// 所有文件上传后,都需要同步到向量数据库
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
}
attachMapper.insert(knowledgeAttach);
@@ -334,15 +332,14 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
/**
* 第一步 定时 拆解PDF文件中的图片
*/
//@Scheduled(fixedDelay = 15000) // 每3秒执行一次
@Scheduled(fixedDelay = 15000) // 每3秒执行一次
public void dealKnowledgeAttachPic() throws Exception {
//处理 拆解PDF文件中的图片的记录
// 处理 拆解PDF文件中的图片的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
@@ -356,30 +353,29 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
*/
@Scheduled(fixedDelay = 15000)
public void dealKnowledgeAttachPicAnys() throws Exception {
//获取未处理的图片记录
// 获取未处理的图片记录
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.last("LIMIT 20")
);
.last("LIMIT 20"));
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
dealFileService.dealPicAnysStatus(picItem);
}
}
}
/**
* 第三步 定时 处理 附件上传后上传向量数据库
*/
@Scheduled(fixedDelay = 30000) // 每3秒执行一次
public void dealKnowledgeAttachVector() throws Exception {
//处理 需要上传向量数据库的记录
// 处理 需要上传向量数据库的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10));
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
@@ -387,23 +383,24 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
}
}
}
/**
* 第四步 定时 处理 失败数据
*/
@Scheduled(fixedDelay = 30 * 60 * 1000)
public void dealKnowledge40Status() throws Exception {
//拆解PDF失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
//将图片分析失败的数据 重新设置状态
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
//上传向量库失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
// 拆解PDF失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
// 将图片分析失败的数据 重新设置状态
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
// 上传向量库失败 重新设置状态
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
}
}