feat: 集成阿里百炼API实现图片内容识别功能

添加DashscopeService接口及实现,用于调用阿里百炼API进行图片内容识别
修改PdfImageExtractService增加基于百炼API的图片处理逻辑
新增OSS服务方法支持临时文件处理和删除
更新配置文件添加百炼模型相关配置
This commit is contained in:
zhouweiyi
2025-06-04 17:55:47 +08:00
parent 53e3180658
commit 1d51a103d0
13 changed files with 472 additions and 83 deletions

View File

@@ -0,0 +1,23 @@
package org.ruoyi.service;
import java.io.IOException;
/**
* @Description: 阿里百炼api
* @Date: 2025/6/4 下午2:24
*/
public interface DashscopeService {
/**
* 视觉推理QVQ
* @param imageUrl 图片可访问的地址
* @return
*/
String qvq(String imageUrl) throws IOException;
/**
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
String qvq4LocalPath(String localPath) throws IOException;
}

View File

@@ -30,7 +30,23 @@ public interface PdfImageExtractService {
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
/**
*利用百炼接口处理文件内容
*
* @param imageUrl 传入图片地址
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException;
/**
* 利用百炼接口处理文件内容
*
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath)throws IOException;
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
* @param file

View File

@@ -0,0 +1,149 @@
package org.ruoyi.service.impl;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import io.reactivex.Flowable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.DashscopeService;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
/**
* @Description: 阿里百炼API
* @Date: 2025/6/4 下午2:28
*/
@Service
@Slf4j
public class DashscopeServiceImpl implements DashscopeService {
private static StringBuilder reasoningContent = new StringBuilder();
private static StringBuilder finalContent = new StringBuilder();
private static boolean isFirstPrint = true;
@Value("${dashscope.model}")
private String serviceModel;
@Value("${dashscope.key}")
private String serviceKey;
/**
* 视觉推理QVQ
* @param imageUrl 图片可访问地址
* @return
*/
@Override
public String qvq(String imageUrl) throws IOException {
try {
// 构建多模态消息
MultiModalMessage userMessage = MultiModalMessage.builder()
.role(Role.USER.getValue())
.content(Arrays.asList(
Collections.singletonMap("text", "这张图片有什么"),
Collections.singletonMap("image", imageUrl)
))
.build();
// 构建请求参数
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(serviceKey) // 使用配置文件中的API Key
.model(serviceModel)
.message(userMessage)
.build();
MultiModalConversation conv = new MultiModalConversation();
// 调用API
Flowable<MultiModalConversationResult> result = conv.streamCall(
param);
reasoningContent = new StringBuilder();
finalContent = new StringBuilder();
isFirstPrint = true;
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
return finalContent.toString().replaceAll("[\n\r\s]", "");
} catch (Exception e) {
log.error("调用百炼API失败: {}", e.getMessage(), e);
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
}
}
/**
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
@Override
public String qvq4LocalPath(String localPath) throws IOException {
try {
// 构建多模态消息
String filePath = "file://"+ localPath;
log.info("filePath: {}", filePath);
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(new HashMap<String, Object>(){{put("image", filePath);}},
new HashMap<String, Object>(){{put("text", "这张图片有什么");}})).build();
// 构建请求参数
MultiModalConversationParam param = MultiModalConversationParam.builder()
.apiKey(serviceKey) // 使用配置文件中的API Key
.model(serviceModel)
.message(userMessage)
.build();
MultiModalConversation conv = new MultiModalConversation();
// 调用API
Flowable<MultiModalConversationResult> result = conv.streamCall(
param);
reasoningContent = new StringBuilder();
finalContent = new StringBuilder();
isFirstPrint = true;
result.blockingForEach(DashscopeServiceImpl::handleGenerationResult);
return finalContent.toString().replaceAll("[\n\r\s]", "");
} catch (Exception e) {
log.error("调用百炼API失败: {}", e.getMessage(), e);
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
}
}
private static void handleGenerationResult(MultiModalConversationResult message) {
String re = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
String reasoning = Objects.isNull(re) ? "" : re; // 默认值
List<Map<String, Object>> content = message.getOutput().getChoices().get(0).getMessage()
.getContent();
if (!reasoning.isEmpty()) {
reasoningContent.append(reasoning);
if (isFirstPrint) {
System.out.println("====================思考过程====================");
isFirstPrint = false;
}
System.out.print(reasoning);
}
if (Objects.nonNull(content) && !content.isEmpty()) {
Object text = content.get(0).get("text");
finalContent.append(text);
if (!isFirstPrint) {
System.out.println("\n====================完整回复====================");
isFirstPrint = true;
}
System.out.print(text);
}
}
}

View File

@@ -4,7 +4,6 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
@@ -14,10 +13,11 @@ import okhttp3.OkHttpClient.Builder;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.DashscopeService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
@@ -25,20 +25,21 @@ import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
//@Service
@Service
@Slf4j
@Data
@AllArgsConstructor
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
public class PdfImageExtractServiceImpl {
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
// @Value("${pdf.extract.service.url}")
@Value("${pdf.extract.service.url}")
private String serviceUrl;
// @Value("${pdf.extract.ai-api.url}")
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
// @Value("${pdf.extract.ai-api.key}")
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Autowired
private DashscopeService dashscopeService;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
.readTimeout(150, TimeUnit.SECONDS)
@@ -48,7 +49,7 @@ public class PdfImageExtractServiceImpl {
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
// @Override
// @Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
@@ -135,7 +136,43 @@ public class PdfImageExtractServiceImpl {
return results;
}
// @Override
/**
* 利用百炼接口处理文件内容
*
* @param imageUrl 传入图片地址
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
public List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException {
String qvq = dashscopeService.qvq(imageUrl);
// 构建结果列表
List<PdfFileContentResult> results = new ArrayList<>();
String filename = "image_" + System.currentTimeMillis();
results.add(new PdfFileContentResult(filename, qvq));
return results;
}
/**
* 利用百炼接口处理文件内容
*
* 视觉推理QVQ 使用本地文件输入Base64编码或本地路径
* @param localPath 图片文件的绝对路径
* @return
*/
@Override
public List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath) throws IOException {
String qvq = dashscopeService.qvq4LocalPath(localPath);
// 构建结果列表
List<PdfFileContentResult> results = new ArrayList<>();
String filename = "image_" + System.currentTimeMillis();
results.add(new PdfFileContentResult(filename, qvq));
return results;
}
// @Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;