提取PDF中的图片并调用大模型,识别图片内容并返回

This commit is contained in:
zhouweiyi
2025-05-13 10:55:39 +08:00
parent 3666157d14
commit 32da85daab
7 changed files with 441 additions and 98 deletions

View File

@@ -0,0 +1,41 @@
package org.ruoyi.service;
import java.io.IOException;
import java.util.List;
import org.ruoyi.domain.PdfFileContentResult;
import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务接口
*/
public interface PdfImageExtractService {
/**
* 从PDF文件中提取图片
*
* @param pdfFile PDF文件
* @param imageFormat 输出图片格式 (png, jpeg, gif)
* @param allowDuplicates 是否允许重复图片
* @return 包含提取图片的ZIP文件的字节数组
* @throws IOException 如果文件处理过程中发生错误
*/
byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException;
/**
* 处理文件内容
*
* @param unzip Base64编码的图片数组
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
* @param file
* @return
* @throws IOException
*/
List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
}

View File

@@ -0,0 +1,144 @@
package org.ruoyi.service.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.OkHttpClient.Builder;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
@Service
@Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey ;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
.readTimeout(150, TimeUnit.SECONDS)
.writeTimeout(150, TimeUnit.SECONDS)
.callTimeout(300, TimeUnit.SECONDS)
.build();
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
@Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
RequestBody requestBody = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
.addFormDataPart("format", imageFormat)
.addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
.build();
// 创建请求
Request request = new Request.Builder()
.url(serviceUrl + "/api/v1/misc/extract-images")
.post(requestBody)
.build();
// 执行请求
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new IOException("请求失败: " + response.code());
}
return response.body().bytes();
}
}
/**
* 处理文件内容
*
* @param unzip Base64编码的图片数组
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
List<PdfFileContentResult> results = new ArrayList<>();
int i = 0;
for (String base64Image : unzip) {
// 构建请求JSON
String requestJson = String.format("{"
+ "\"model\": \"gpt-4o\","
+ "\"stream\": false,"
+ "\"messages\": [{"
+ "\"role\": \"user\","
+ "\"content\": [{"
+ "\"type\": \"text\","
+ "\"text\": \"这张图片有什么\""
+ "}, {"
+ "\"type\": \"image_url\","
+ "\"image_url\": {"
+ "\"url\": \"%s\""
+ "}}"
+ "]}],"
+ "\"max_tokens\": 400"
+ "}", base64Image);
// 创建请求
Request request = new Request.Builder()
.url(aiApiUrl)
.addHeader("Authorization", "Bearer " + aiApiKey)
.post(RequestBody.create(JSON, requestJson))
.build();
// 执行请求
try {
log.info("=============call=" + ++i);
Response response = client.newCall(request).execute();
log.info("=============response=" + response);
if (!response.isSuccessful()) {
throw new IOException("API请求失败: " + response.code() + response.toString());
}
String responseBody = response.body().string();
log.info("=============responseBody=" + responseBody);
// 使用文件名这里使用base64的前10个字符作为标识和API返回内容创建结果对象
String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
results.add(new PdfFileContentResult(filename, responseBody));
} catch (Exception e) {
log.error(e.getMessage());
throw new RuntimeException(e);
}
}
return results;
}
@Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;
// 获取ZIP数据
byte[] zipData = this.extractImages(file, format, allowDuplicates);
// 解压文件并识别图片内容并返回
String[] unzip = ZipUtils.unzipForBase64(zipData);
//解析图片内容
return this.dealFileContent(unzip);
}
}