提取PDF中的图片并调用大模型,识别图片内容并返回

This commit is contained in:
zhouweiyi
2025-05-13 10:55:39 +08:00
parent 3666157d14
commit 32da85daab
7 changed files with 441 additions and 98 deletions

View File

@@ -94,3 +94,8 @@ sms:
# 腾讯专用
sdkAppId:
pdf:
extract:
service:
url: http://localhost:8080

View File

@@ -172,3 +172,8 @@ sms:
signName: 测试
# 腾讯专用
sdkAppId:
pdf:
extract:
service:
url: http://localhost:8080

View File

@@ -0,0 +1,30 @@
package org.ruoyi.domain;
/**
* 文件内容结果封装类
*/
public class PdfFileContentResult {
private String filename;
private String content;
public PdfFileContentResult(String filename, String content) {
this.filename = filename;
this.content = content;
}
public String getFilename() {
return filename;
}
public void setFilename(String filename) {
this.filename = filename;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}

View File

@@ -0,0 +1,41 @@
package org.ruoyi.service;
import java.io.IOException;
import java.util.List;
import org.ruoyi.domain.PdfFileContentResult;
import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务接口
*/
public interface PdfImageExtractService {
/**
* 从PDF文件中提取图片
*
* @param pdfFile PDF文件
* @param imageFormat 输出图片格式 (png, jpeg, gif)
* @param allowDuplicates 是否允许重复图片
* @return 包含提取图片的ZIP文件的字节数组
* @throws IOException 如果文件处理过程中发生错误
*/
byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException;
/**
* 处理文件内容
*
* @param unzip Base64编码的图片数组
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
* @param file
* @return
* @throws IOException
*/
List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
}

View File

@@ -0,0 +1,144 @@
package org.ruoyi.service.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.OkHttpClient.Builder;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
@Service
@Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey ;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
.readTimeout(150, TimeUnit.SECONDS)
.writeTimeout(150, TimeUnit.SECONDS)
.callTimeout(300, TimeUnit.SECONDS)
.build();
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
@Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
RequestBody requestBody = new MultipartBody.Builder()
.setType(MultipartBody.FORM)
.addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
.addFormDataPart("format", imageFormat)
.addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
.build();
// 创建请求
Request request = new Request.Builder()
.url(serviceUrl + "/api/v1/misc/extract-images")
.post(requestBody)
.build();
// 执行请求
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new IOException("请求失败: " + response.code());
}
return response.body().bytes();
}
}
/**
* 处理文件内容
*
* @param unzip Base64编码的图片数组
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
List<PdfFileContentResult> results = new ArrayList<>();
int i = 0;
for (String base64Image : unzip) {
// 构建请求JSON
String requestJson = String.format("{"
+ "\"model\": \"gpt-4o\","
+ "\"stream\": false,"
+ "\"messages\": [{"
+ "\"role\": \"user\","
+ "\"content\": [{"
+ "\"type\": \"text\","
+ "\"text\": \"这张图片有什么\""
+ "}, {"
+ "\"type\": \"image_url\","
+ "\"image_url\": {"
+ "\"url\": \"%s\""
+ "}}"
+ "]}],"
+ "\"max_tokens\": 400"
+ "}", base64Image);
// 创建请求
Request request = new Request.Builder()
.url(aiApiUrl)
.addHeader("Authorization", "Bearer " + aiApiKey)
.post(RequestBody.create(JSON, requestJson))
.build();
// 执行请求
try {
log.info("=============call=" + ++i);
Response response = client.newCall(request).execute();
log.info("=============response=" + response);
if (!response.isSuccessful()) {
throw new IOException("API请求失败: " + response.code() + response.toString());
}
String responseBody = response.body().string();
log.info("=============responseBody=" + responseBody);
// 使用文件名这里使用base64的前10个字符作为标识和API返回内容创建结果对象
String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
results.add(new PdfFileContentResult(filename, responseBody));
} catch (Exception e) {
log.error(e.getMessage());
throw new RuntimeException(e);
}
}
return results;
}
@Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;
// 获取ZIP数据
byte[] zipData = this.extractImages(file, format, allowDuplicates);
// 解压文件并识别图片内容并返回
String[] unzip = ZipUtils.unzipForBase64(zipData);
//解析图片内容
return this.dealFileContent(unzip);
}
}

View File

@@ -0,0 +1,95 @@
package org.ruoyi.utils;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
* ZIP文件处理工具类
*/
public class ZipUtils {
/**
* 解压ZIP文件到指定目录
*
* @param zipData ZIP文件的字节数组
* @param destDir 目标目录
* @return 解压后的文件路径列表
* @throws IOException 如果解压过程中发生错误
*/
public static String[] unzip(byte[] zipData, String destDir) throws IOException {
File destDirFile = new File(destDir);
if (!destDirFile.exists()) {
destDirFile.mkdirs();
}
List<String> extractedPaths = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
String filePath = destDir + File.separator + zipEntry.getName();
if (!zipEntry.isDirectory()) {
extractFile(zis, filePath);
extractedPaths.add(filePath);
} else {
new File(filePath).mkdirs();
}
zis.closeEntry();
}
}
return extractedPaths.toArray(new String[0]);
}
private static void extractFile(ZipInputStream zis, String filePath) throws IOException {
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath))) {
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
bos.write(buffer, 0, read);
}
}
}
/**
* 解压ZIP文件并返回文件内容的Base64编码字符串数组
*
* @param zipData ZIP文件的字节数组
* @return Base64编码的文件内容数组
* @throws IOException 如果解压过程中发生错误
*/
public static String[] unzipForBase64(byte[] zipData) throws IOException {
List<String> base64Contents = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (!zipEntry.isDirectory()) {
// 读取文件内容到内存
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
// 将文件内容转换为Base64字符串
String base64Content = Base64.getEncoder().encodeToString(baos.toByteArray());
base64Contents.add(base64Content);
}
zis.closeEntry();
}
}
return base64Contents.toArray(new String[0]);
}
}

View File

@@ -1,9 +1,12 @@
package org.ruoyi.chat.controller.knowledge;
import cn.dev33.satoken.stp.StpUtil;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import jakarta.servlet.http.HttpServletResponse;
import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.NotNull;
import java.io.IOException;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.common.core.validate.AddGroup;
@@ -14,6 +17,7 @@ import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.common.web.core.BaseController;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.KnowledgeAttachBo;
import org.ruoyi.domain.bo.KnowledgeFragmentBo;
import org.ruoyi.domain.bo.KnowledgeInfoBo;
@@ -24,6 +28,7 @@ import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.service.IKnowledgeAttachService;
import org.ruoyi.service.IKnowledgeFragmentService;
import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.PdfImageExtractService;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
@@ -41,117 +46,135 @@ import java.util.List;
@RequestMapping("/knowledge")
public class KnowledgeController extends BaseController {
private final IKnowledgeInfoService knowledgeInfoService;
private final IKnowledgeInfoService knowledgeInfoService;
private final IKnowledgeAttachService attachService;
private final IKnowledgeAttachService attachService;
private final IKnowledgeFragmentService fragmentService;
private final IKnowledgeFragmentService fragmentService;
/**
* 根据用户信息查询本地知识库
*/
@GetMapping("/list")
public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
if (!StpUtil.isLogin()) {
throw new SecurityException("请先去登录!");
}
bo.setUid(LoginHelper.getUserId());
return knowledgeInfoService.queryPageList(bo, pageQuery);
private final PdfImageExtractService pdfImageExtractService;
/**
* 根据用户信息查询本地知识库
*/
@GetMapping("/list")
public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
if (!StpUtil.isLogin()) {
throw new SecurityException("请先去登录!");
}
bo.setUid(LoginHelper.getUserId());
return knowledgeInfoService.queryPageList(bo, pageQuery);
}
/**
* 新增知识库
*/
@Log(title = "知识库", businessType = BusinessType.INSERT)
@PostMapping("/save")
public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
knowledgeInfoService.saveOne(bo);
return R.ok();
}
/**
* 新增知识库
*/
@Log(title = "知识库", businessType = BusinessType.INSERT)
@PostMapping("/save")
public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
knowledgeInfoService.saveOne(bo);
return R.ok();
}
/**
* 删除知识库
*/
@PostMapping("/remove/{id}")
public R<String> remove(@PathVariable String id) {
knowledgeInfoService.removeKnowledge(id);
return R.ok("删除知识库成功!");
}
/**
* 删除知识库
*/
@PostMapping("/remove/{id}")
public R<String> remove(@PathVariable String id) {
knowledgeInfoService.removeKnowledge(id);
return R.ok("删除知识库成功!");
}
/**
* 修改知识库
*/
@Log(title = "知识库", businessType = BusinessType.UPDATE)
@PostMapping("/edit")
public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
return toAjax(knowledgeInfoService.updateByBo(bo));
}
/**
* 修改知识库
*/
@Log(title = "知识库", businessType = BusinessType.UPDATE)
@PostMapping("/edit")
public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
return toAjax(knowledgeInfoService.updateByBo(bo));
}
/**
* 导出知识库列表
*/
@Log(title = "知识库", businessType = BusinessType.EXPORT)
@PostMapping("/export")
public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
}
/**
* 导出知识库列表
*/
@Log(title = "知识库", businessType = BusinessType.EXPORT)
@PostMapping("/export")
public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
}
/**
* 查询知识附件信息
*/
@GetMapping("/detail/{kid}")
public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery, @PathVariable String kid) {
bo.setKid(kid);
return attachService.queryPageList(bo, pageQuery);
}
/**
* 查询知识附件信息
*/
@GetMapping("/detail/{kid}")
public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery,
@PathVariable String kid) {
bo.setKid(kid);
return attachService.queryPageList(bo, pageQuery);
}
/**
* 上传知识库附件
*/
@PostMapping(value = "/attach/upload")
public R<String> upload(KnowledgeInfoUploadBo bo) {
knowledgeInfoService.upload(bo);
return R.ok("上传知识库附件成功!");
}
/**
* 上传知识库附件
*/
@PostMapping(value = "/attach/upload")
public R<String> upload(KnowledgeInfoUploadBo bo) {
knowledgeInfoService.upload(bo);
return R.ok("上传知识库附件成功!");
}
/**
* 获取知识库附件详细信息
*
* @param id 主键
*/
@GetMapping("attach/info/{id}")
public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
@PathVariable Long id) {
return R.ok(attachService.queryById(id));
}
/**
* 获取知识库附件详细信息
*
* @param id 主键
*/
@GetMapping("attach/info/{id}")
public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
@PathVariable Long id) {
return R.ok(attachService.queryById(id));
}
/**
* 删除知识库附件
*/
@PostMapping("attach/remove/{kid}")
public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
@PathVariable String kid) {
attachService.removeKnowledgeAttach(kid);
return R.ok();
}
/**
* 删除知识库附件
*/
@PostMapping("attach/remove/{kid}")
public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
@PathVariable String kid) {
attachService.removeKnowledgeAttach(kid);
return R.ok();
}
/**
* 查询知识片段
*/
@GetMapping("/fragment/list/{docId}")
public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo, PageQuery pageQuery, @PathVariable String docId) {
bo.setDocId(docId);
return fragmentService.queryPageList(bo, pageQuery);
}
/**
* 查询知识片段
*/
@GetMapping("/fragment/list/{docId}")
public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo,
PageQuery pageQuery, @PathVariable String docId) {
bo.setDocId(docId);
return fragmentService.queryPageList(bo, pageQuery);
}
/**
* 上传文件翻译
*/
@PostMapping("/translationByFile")
@ResponseBody
public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
return attachService.translationByFile(file, targetLanguage);
}
/**
* 上传文件翻译
*/
@PostMapping("/translationByFile")
@ResponseBody
public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
return attachService.translationByFile(file, targetLanguage);
}
/**
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
*
* @param file PDF文件
* @return 保存的文件路径信息
*/
@PostMapping("/extract-images")
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
public R<List<PdfFileContentResult>> extractImages(
@RequestPart("file") MultipartFile file
) throws IOException {
return R.ok(pdfImageExtractService.extractImages(file));
}
}