提取PDF中的图片并调用大模型,识别图片内容并返回

2026-04-28 02:56:41 +00:00 · 2025-05-13 10:55:39 +08:00
parent 3666157d14
commit 32da85daab
7 changed files with 441 additions and 98 deletions
--- a/ruoyi-admin/src/main/resources/application-dev.yml
+++ b/ruoyi-admin/src/main/resources/application-dev.yml
@@ -94,3 +94,8 @@ sms:
  # 腾讯专用
  sdkAppId:

+pdf:
+  extract:
+    service:
+      url: http://localhost:8080
+
--- a/ruoyi-admin/src/main/resources/application-prod.yml
+++ b/ruoyi-admin/src/main/resources/application-prod.yml
@@ -172,3 +172,8 @@ sms:
  signName: 测试
  # 腾讯专用
  sdkAppId:
+
+pdf:
+  extract:
+    service:
+      url: http://localhost:8080
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/PdfFileContentResult.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/PdfFileContentResult.java
@@ -0,0 +1,30 @@
+package org.ruoyi.domain;
+
+/**
+ * 文件内容结果封装类
+ */
+public class PdfFileContentResult {
+    private String filename;
+    private String content;
+
+    public PdfFileContentResult(String filename, String content) {
+        this.filename = filename;
+        this.content = content;
+    }
+
+    public String getFilename() {
+        return filename;
+    }
+
+    public void setFilename(String filename) {
+        this.filename = filename;
+    }
+
+    public String getContent() {
+        return content;
+    }
+
+    public void setContent(String content) {
+        this.content = content;
+    }
+}
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/PdfImageExtractService.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/PdfImageExtractService.java
@@ -0,0 +1,41 @@
+package org.ruoyi.service;
+
+import java.io.IOException;
+import java.util.List;
+import org.ruoyi.domain.PdfFileContentResult;
+import org.springframework.web.multipart.MultipartFile;
+
+/**
+ * PDF图片提取服务接口
+ */
+public interface PdfImageExtractService {
+
+  /**
+   * 从PDF文件中提取图片
+   *
+   * @param pdfFile PDF文件
+   * @param imageFormat 输出图片格式 (png, jpeg, gif)
+   * @param allowDuplicates 是否允许重复图片
+   * @return 包含提取图片的ZIP文件的字节数组
+   * @throws IOException 如果文件处理过程中发生错误
+   */
+  byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
+      throws IOException;
+
+  /**
+   * 处理文件内容
+   *
+   * @param unzip Base64编码的图片数组
+   * @return 文件内容结果列表
+   * @throws IOException 如果API调用过程中发生错误
+   */
+  List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
+
+  /**
+   * 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
+   * @param file
+   * @return
+   * @throws IOException
+   */
+  List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
+}
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/PdfImageExtractServiceImpl.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/PdfImageExtractServiceImpl.java
@@ -0,0 +1,144 @@
+package org.ruoyi.service.impl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
+import lombok.extern.slf4j.Slf4j;
+import okhttp3.MediaType;
+import okhttp3.MultipartBody;
+import okhttp3.OkHttpClient;
+import okhttp3.OkHttpClient.Builder;
+import okhttp3.Request;
+import okhttp3.RequestBody;
+import okhttp3.Response;
+import org.ruoyi.common.core.domain.R;
+import org.ruoyi.domain.PdfFileContentResult;
+import org.ruoyi.service.PdfImageExtractService;
+import org.ruoyi.utils.ZipUtils;
+import org.springframework.beans.factory.annotation.Value;
+import org.springframework.stereotype.Service;
+import org.springframework.web.multipart.MultipartFile;
+
+/**
+ * PDF图片提取服务实现类
+ */
+@Service
+@Slf4j
+public class PdfImageExtractServiceImpl implements PdfImageExtractService {
+
+  @Value("${pdf.extract.service.url}")
+  private String serviceUrl;
+  @Value("${pdf.extract.ai-api.url}")
+  private String aiApiUrl;
+  @Value("${pdf.extract.ai-api.key}")
+  private String aiApiKey ;
+
+  private final OkHttpClient client = new Builder()
+      .connectTimeout(100, TimeUnit.SECONDS)
+      .readTimeout(150, TimeUnit.SECONDS)
+      .writeTimeout(150, TimeUnit.SECONDS)
+      .callTimeout(300, TimeUnit.SECONDS)
+      .build();
+
+  private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
+
+  @Override
+  public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
+      throws IOException {
+    // 构建multipart请求
+    RequestBody requestBody = new MultipartBody.Builder()
+        .setType(MultipartBody.FORM)
+        .addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
+            RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
+        .addFormDataPart("format", imageFormat)
+        .addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
+        .build();
+
+    // 创建请求
+    Request request = new Request.Builder()
+        .url(serviceUrl + "/api/v1/misc/extract-images")
+        .post(requestBody)
+        .build();
+
+    // 执行请求
+    try (Response response = client.newCall(request).execute()) {
+      if (!response.isSuccessful()) {
+        throw new IOException("请求失败: " + response.code());
+      }
+      return response.body().bytes();
+    }
+  }
+
+  /**
+   * 处理文件内容
+   *
+   * @param unzip Base64编码的图片数组
+   * @return 文件内容结果列表
+   * @throws IOException 如果API调用过程中发生错误
+   */
+  @Override
+  public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
+    List<PdfFileContentResult> results = new ArrayList<>();
+    int i = 0;
+    for (String base64Image : unzip) {
+      // 构建请求JSON
+      String requestJson = String.format("{"
+          + "\"model\": \"gpt-4o\","
+          + "\"stream\": false,"
+          + "\"messages\": [{"
+          + "\"role\": \"user\","
+          + "\"content\": [{"
+          + "\"type\": \"text\","
+          + "\"text\": \"这张图片有什么\""
+          + "}, {"
+          + "\"type\": \"image_url\","
+          + "\"image_url\": {"
+          + "\"url\": \"%s\""
+          + "}}"
+          + "]}],"
+          + "\"max_tokens\": 400"
+          + "}", base64Image);
+
+      // 创建请求
+      Request request = new Request.Builder()
+          .url(aiApiUrl)
+          .addHeader("Authorization", "Bearer " + aiApiKey)
+          .post(RequestBody.create(JSON, requestJson))
+          .build();
+
+      // 执行请求
+      try {
+        log.info("=============call=" + ++i);
+        Response response = client.newCall(request).execute();
+        log.info("=============response=" + response);
+        if (!response.isSuccessful()) {
+          throw new IOException("API请求失败: " + response.code() + response.toString());
+        }
+
+        String responseBody = response.body().string();
+        log.info("=============responseBody=" + responseBody);
+        // 使用文件名（这里使用base64的前10个字符作为标识）和API返回内容创建结果对象
+        String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
+        results.add(new PdfFileContentResult(filename, responseBody));
+      } catch (Exception e) {
+        log.error(e.getMessage());
+        throw new RuntimeException(e);
+      }
+    }
+
+    return results;
+  }
+
+  @Override
+  public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
+    String format = "png";
+    boolean allowDuplicates = true;
+    // 获取ZIP数据
+    byte[] zipData = this.extractImages(file, format, allowDuplicates);
+    // 解压文件并识别图片内容并返回
+    String[] unzip = ZipUtils.unzipForBase64(zipData);
+    //解析图片内容
+    return this.dealFileContent(unzip);
+  }
+}
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/utils/ZipUtils.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/utils/ZipUtils.java
@@ -0,0 +1,95 @@
+package org.ruoyi.utils;
+
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Base64;
+import java.util.List;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipInputStream;
+
+/**
+ * ZIP文件处理工具类
+ */
+public class ZipUtils {
+
+    /**
+     * 解压ZIP文件到指定目录
+     *
+     * @param zipData ZIP文件的字节数组
+     * @param destDir 目标目录
+     * @return 解压后的文件路径列表
+     * @throws IOException 如果解压过程中发生错误
+     */
+    public static String[] unzip(byte[] zipData, String destDir) throws IOException {
+        File destDirFile = new File(destDir);
+        if (!destDirFile.exists()) {
+            destDirFile.mkdirs();
+        }
+
+        List<String> extractedPaths = new ArrayList<>();
+        try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
+             ZipInputStream zis = new ZipInputStream(bis)) {
+
+            ZipEntry zipEntry;
+            while ((zipEntry = zis.getNextEntry()) != null) {
+                String filePath = destDir + File.separator + zipEntry.getName();
+                if (!zipEntry.isDirectory()) {
+                    extractFile(zis, filePath);
+                    extractedPaths.add(filePath);
+                } else {
+                    new File(filePath).mkdirs();
+                }
+                zis.closeEntry();
+            }
+        }
+        return extractedPaths.toArray(new String[0]);
+    }
+
+    private static void extractFile(ZipInputStream zis, String filePath) throws IOException {
+        try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath))) {
+            byte[] buffer = new byte[4096];
+            int read;
+            while ((read = zis.read(buffer)) != -1) {
+                bos.write(buffer, 0, read);
+            }
+        }
+    }
+
+    /**
+     * 解压ZIP文件并返回文件内容的Base64编码字符串数组
+     *
+     * @param zipData ZIP文件的字节数组
+     * @return Base64编码的文件内容数组
+     * @throws IOException 如果解压过程中发生错误
+     */
+    public static String[] unzipForBase64(byte[] zipData) throws IOException {
+        List<String> base64Contents = new ArrayList<>();
+        try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
+             ZipInputStream zis = new ZipInputStream(bis)) {
+
+            ZipEntry zipEntry;
+            while ((zipEntry = zis.getNextEntry()) != null) {
+                if (!zipEntry.isDirectory()) {
+                    // 读取文件内容到内存
+                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                    byte[] buffer = new byte[4096];
+                    int read;
+                    while ((read = zis.read(buffer)) != -1) {
+                        baos.write(buffer, 0, read);
+                    }
+                    
+                    // 将文件内容转换为Base64字符串
+                    String base64Content = Base64.getEncoder().encodeToString(baos.toByteArray());
+                    base64Contents.add(base64Content);
+                }
+                zis.closeEntry();
+            }
+        }
+        return base64Contents.toArray(new String[0]);
+    }
+}
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/controller/knowledge/KnowledgeController.java
+++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/controller/knowledge/KnowledgeController.java
@@ -1,9 +1,12 @@
 package org.ruoyi.chat.controller.knowledge;

 import cn.dev33.satoken.stp.StpUtil;
+import io.swagger.v3.oas.annotations.Operation;
+import io.swagger.v3.oas.annotations.Parameter;
 import jakarta.servlet.http.HttpServletResponse;
 import jakarta.validation.constraints.NotEmpty;
 import jakarta.validation.constraints.NotNull;
+import java.io.IOException;
 import lombok.RequiredArgsConstructor;
 import org.ruoyi.common.core.domain.R;
 import org.ruoyi.common.core.validate.AddGroup;
@@ -14,6 +17,7 @@ import org.ruoyi.common.satoken.utils.LoginHelper;
 import org.ruoyi.common.web.core.BaseController;
 import org.ruoyi.core.page.PageQuery;
 import org.ruoyi.core.page.TableDataInfo;
+import org.ruoyi.domain.PdfFileContentResult;
 import org.ruoyi.domain.bo.KnowledgeAttachBo;
 import org.ruoyi.domain.bo.KnowledgeFragmentBo;
 import org.ruoyi.domain.bo.KnowledgeInfoBo;
@@ -24,6 +28,7 @@ import org.ruoyi.domain.vo.KnowledgeInfoVo;
 import org.ruoyi.service.IKnowledgeAttachService;
 import org.ruoyi.service.IKnowledgeFragmentService;
 import org.ruoyi.service.IKnowledgeInfoService;
+import org.ruoyi.service.PdfImageExtractService;
 import org.springframework.validation.annotation.Validated;
 import org.springframework.web.bind.annotation.*;
 import org.springframework.web.multipart.MultipartFile;
@@ -41,117 +46,135 @@ import java.util.List;
@RequestMapping("/knowledge")
 public class KnowledgeController extends BaseController {

-    private final IKnowledgeInfoService knowledgeInfoService;
+  private final IKnowledgeInfoService knowledgeInfoService;

-    private final IKnowledgeAttachService attachService;
+  private final IKnowledgeAttachService attachService;

-    private final IKnowledgeFragmentService fragmentService;
+  private final IKnowledgeFragmentService fragmentService;

-    /**
-     * 根据用户信息查询本地知识库
-     */
-    @GetMapping("/list")
-    public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
-        if (!StpUtil.isLogin()) {
-            throw new SecurityException("请先去登录!");
-        }
-        bo.setUid(LoginHelper.getUserId());
-        return knowledgeInfoService.queryPageList(bo, pageQuery);
+  private final PdfImageExtractService pdfImageExtractService;
+
+  /**
+   * 根据用户信息查询本地知识库
+   */
+  @GetMapping("/list")
+  public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
+    if (!StpUtil.isLogin()) {
+      throw new SecurityException("请先去登录!");
    }
+    bo.setUid(LoginHelper.getUserId());
+    return knowledgeInfoService.queryPageList(bo, pageQuery);
+  }

-    /**
-     * 新增知识库
-     */
-    @Log(title = "知识库", businessType = BusinessType.INSERT)
-    @PostMapping("/save")
-    public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
-        knowledgeInfoService.saveOne(bo);
-        return R.ok();
-    }
+  /**
+   * 新增知识库
+   */
+  @Log(title = "知识库", businessType = BusinessType.INSERT)
+  @PostMapping("/save")
+  public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
+    knowledgeInfoService.saveOne(bo);
+    return R.ok();
+  }

-    /**
-     * 删除知识库
-     */
-    @PostMapping("/remove/{id}")
-    public R<String> remove(@PathVariable String id) {
-        knowledgeInfoService.removeKnowledge(id);
-        return R.ok("删除知识库成功!");
-    }
+  /**
+   * 删除知识库
+   */
+  @PostMapping("/remove/{id}")
+  public R<String> remove(@PathVariable String id) {
+    knowledgeInfoService.removeKnowledge(id);
+    return R.ok("删除知识库成功!");
+  }

-    /**
-     * 修改知识库
-     */
-    @Log(title = "知识库", businessType = BusinessType.UPDATE)
-    @PostMapping("/edit")
-    public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
-        return toAjax(knowledgeInfoService.updateByBo(bo));
-    }
+  /**
+   * 修改知识库
+   */
+  @Log(title = "知识库", businessType = BusinessType.UPDATE)
+  @PostMapping("/edit")
+  public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
+    return toAjax(knowledgeInfoService.updateByBo(bo));
+  }

-    /**
-     * 导出知识库列表
-     */
-    @Log(title = "知识库", businessType = BusinessType.EXPORT)
-    @PostMapping("/export")
-    public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
-        List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
-        ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
-    }
+  /**
+   * 导出知识库列表
+   */
+  @Log(title = "知识库", businessType = BusinessType.EXPORT)
+  @PostMapping("/export")
+  public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
+    List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
+    ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
+  }

-    /**
-     * 查询知识附件信息
-     */
-    @GetMapping("/detail/{kid}")
-    public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery, @PathVariable String kid) {
-        bo.setKid(kid);
-        return attachService.queryPageList(bo, pageQuery);
-    }
+  /**
+   * 查询知识附件信息
+   */
+  @GetMapping("/detail/{kid}")
+  public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery,
+      @PathVariable String kid) {
+    bo.setKid(kid);
+    return attachService.queryPageList(bo, pageQuery);
+  }

-    /**
-     * 上传知识库附件
-     */
-    @PostMapping(value = "/attach/upload")
-    public R<String> upload(KnowledgeInfoUploadBo bo) {
-        knowledgeInfoService.upload(bo);
-        return R.ok("上传知识库附件成功!");
-    }
+  /**
+   * 上传知识库附件
+   */
+  @PostMapping(value = "/attach/upload")
+  public R<String> upload(KnowledgeInfoUploadBo bo) {
+    knowledgeInfoService.upload(bo);
+    return R.ok("上传知识库附件成功!");
+  }

-    /**
-     * 获取知识库附件详细信息
-     *
-     * @param id 主键
-     */
-    @GetMapping("attach/info/{id}")
-    public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
-                                              @PathVariable Long id) {
-        return R.ok(attachService.queryById(id));
-    }
+  /**
+   * 获取知识库附件详细信息
+   *
+   * @param id 主键
+   */
+  @GetMapping("attach/info/{id}")
+  public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
+  @PathVariable Long id) {
+    return R.ok(attachService.queryById(id));
+  }

-    /**
-     * 删除知识库附件
-     */
-    @PostMapping("attach/remove/{kid}")
-    public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
-                                @PathVariable String kid) {
-        attachService.removeKnowledgeAttach(kid);
-        return R.ok();
-    }
+  /**
+   * 删除知识库附件
+   */
+  @PostMapping("attach/remove/{kid}")
+  public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
+  @PathVariable String kid) {
+    attachService.removeKnowledgeAttach(kid);
+    return R.ok();
+  }


-    /**
-     * 查询知识片段
-     */
-    @GetMapping("/fragment/list/{docId}")
-    public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo, PageQuery pageQuery, @PathVariable String docId) {
-        bo.setDocId(docId);
-        return fragmentService.queryPageList(bo, pageQuery);
-    }
+  /**
+   * 查询知识片段
+   */
+  @GetMapping("/fragment/list/{docId}")
+  public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo,
+      PageQuery pageQuery, @PathVariable String docId) {
+    bo.setDocId(docId);
+    return fragmentService.queryPageList(bo, pageQuery);
+  }

-    /**
-     * 上传文件翻译
-     */
-    @PostMapping("/translationByFile")
-    @ResponseBody
-    public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
-        return attachService.translationByFile(file, targetLanguage);
-    }
+  /**
+   * 上传文件翻译
+   */
+  @PostMapping("/translationByFile")
+  @ResponseBody
+  public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
+    return attachService.translationByFile(file, targetLanguage);
+  }
+
+  /**
+   * 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
+   *
+   * @param file PDF文件
+   * @return 保存的文件路径信息
+   */
+  @PostMapping("/extract-images")
+  @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
+  public R<List<PdfFileContentResult>> extractImages(
+      @RequestPart("file") MultipartFile file
+  ) throws IOException {
+    return R.ok(pdfImageExtractService.extractImages(file));
+  }
 }