mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-03-23 17:43:43 +08:00
恢复微信模块,优化知识库切片功能
This commit is contained in:
@@ -0,0 +1,41 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import dev.langchain4j.data.document.Document;
|
||||
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.ruoyi.common.core.exception.UtilException;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
// 使用带缓冲的输入流包装(保持原流不自动关闭)
|
||||
try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
|
||||
ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
|
||||
Document document = apacheTikaDocumentParser.parse(bufferedStream);
|
||||
return document.text();
|
||||
} catch (IOException e) {
|
||||
String errorMsg = "Excel文件流读取失败";
|
||||
throw new UtilException(errorMsg, e);
|
||||
} catch (RuntimeException e) {
|
||||
String errorMsg = "Excel内容解析异常";
|
||||
throw new UtilException(errorMsg, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChunkList(String content, String kid) {
|
||||
return textSplitter.split(content, kid);
|
||||
}
|
||||
}
|
||||
@@ -36,6 +36,8 @@ public class ResourceLoaderFactory {
|
||||
return new MarkDownFileLoader(markdownTextSplitter);
|
||||
}else if (FileType.isCodeFile(fileType)) {
|
||||
return new CodeFileLoader(codeTextSplitter);
|
||||
} else if (FileType.isExcel(fileType)) {
|
||||
return new ExcelFileLoader(excelTextSplitter);
|
||||
}else {
|
||||
return new TextFileLoader(characterTextSplitter);
|
||||
}
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
package org.ruoyi.chain.split;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelTextSplitter implements TextSplitter{
|
||||
@Override
|
||||
public List<String> split(String content, String kid) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
package org.ruoyi.service;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @Description: 阿里百炼api
|
||||
* @Date: 2025/6/4 下午2:24
|
||||
*/
|
||||
public interface DashscopeService {
|
||||
|
||||
/**
|
||||
* 视觉推理(QVQ)
|
||||
* @param imageUrl 图片可访问的地址
|
||||
* @return
|
||||
*/
|
||||
String qvq(String imageUrl) throws IOException;
|
||||
/**
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
String qvq4LocalPath(String localPath) throws IOException;
|
||||
}
|
||||
@@ -1,34 +0,0 @@
|
||||
|
||||
/**
|
||||
* 处理文件内容
|
||||
*
|
||||
* @param unzip Base64编码的图片数组
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
|
||||
/**
|
||||
*利用百炼接口处理文件内容
|
||||
*
|
||||
* @param imageUrl 传入图片地址
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent4Dashscope(String imageUrl) throws IOException;
|
||||
|
||||
/**
|
||||
* 利用百炼接口处理文件内容
|
||||
*
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent4DashscopeBase64(String localPath)throws IOException;
|
||||
/**
|
||||
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
|
||||
* @param file
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
|
||||
}
|
||||
@@ -1,150 +0,0 @@
|
||||
package org.ruoyi.service.impl;
|
||||
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
|
||||
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
|
||||
import com.alibaba.dashscope.common.MultiModalMessage;
|
||||
import com.alibaba.dashscope.common.Role;
|
||||
import io.reactivex.Flowable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.service.DashscopeService;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
/**
|
||||
* @Description: 阿里百炼API
|
||||
* @Date: 2025/6/4 下午2:28
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
public class DashscopeServiceImpl implements DashscopeService {
|
||||
|
||||
private boolean isFirstPrint;
|
||||
|
||||
@Value("${dashscope.model}")
|
||||
private String serviceModel;
|
||||
@Value("${dashscope.key}")
|
||||
private String serviceKey;
|
||||
|
||||
/**
|
||||
* 视觉推理(QVQ)
|
||||
* @param imageUrl 图片可访问地址
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String qvq(String imageUrl) throws IOException {
|
||||
try {
|
||||
// 构建多模态消息
|
||||
MultiModalMessage userMessage = MultiModalMessage.builder()
|
||||
.role(Role.USER.getValue())
|
||||
.content(Arrays.asList(
|
||||
Collections.singletonMap("text", "这张图片有什么"),
|
||||
Collections.singletonMap("image", imageUrl)
|
||||
))
|
||||
.build();
|
||||
|
||||
// 构建请求参数
|
||||
MultiModalConversationParam param = MultiModalConversationParam.builder()
|
||||
.apiKey(serviceKey) // 使用配置文件中的API Key
|
||||
.model(serviceModel)
|
||||
.message(userMessage)
|
||||
.build();
|
||||
|
||||
MultiModalConversation conv = new MultiModalConversation();
|
||||
|
||||
// 调用API
|
||||
Flowable<MultiModalConversationResult> result = conv.streamCall(
|
||||
param);
|
||||
|
||||
StringBuilder reasoningContent = new StringBuilder();
|
||||
StringBuilder finalContent = new StringBuilder();
|
||||
isFirstPrint = true;
|
||||
|
||||
result.blockingForEach(message -> handleGenerationResult(message, reasoningContent, finalContent));
|
||||
|
||||
return finalContent.toString().replaceAll("[\n\r\s]", "");
|
||||
} catch (Exception e) {
|
||||
log.error("调用百炼API失败: {}", e.getMessage(), e);
|
||||
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 视觉推理(QVQ) 使用本地文件(输入Base64编码或本地路径)
|
||||
* @param localPath 图片文件的绝对路径
|
||||
* @return
|
||||
*/
|
||||
@Override
|
||||
public String qvq4LocalPath(String localPath) throws IOException {
|
||||
try {
|
||||
// 构建多模态消息
|
||||
String filePath = "file://"+ localPath;
|
||||
log.info("filePath: {}", filePath);
|
||||
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
|
||||
.content(Arrays.asList(new HashMap<String, Object>(){{put("image", filePath);}},
|
||||
new HashMap<String, Object>(){{put("text", "这张图片有什么");}})).build();
|
||||
|
||||
// 构建请求参数
|
||||
MultiModalConversationParam param = MultiModalConversationParam.builder()
|
||||
.apiKey(serviceKey) // 使用配置文件中的API Key
|
||||
.model(serviceModel)
|
||||
.message(userMessage)
|
||||
.build();
|
||||
MultiModalConversation conv = new MultiModalConversation();
|
||||
|
||||
// 调用API
|
||||
Flowable<MultiModalConversationResult> result = conv.streamCall(
|
||||
param);
|
||||
|
||||
StringBuilder reasoningContent = new StringBuilder();
|
||||
StringBuilder finalContent = new StringBuilder();
|
||||
isFirstPrint = true;
|
||||
|
||||
result.blockingForEach(message -> handleGenerationResult(message, reasoningContent, finalContent));
|
||||
|
||||
return finalContent.toString().replaceAll("[\n\r\s]", "");
|
||||
} catch (Exception e) {
|
||||
log.error("调用百炼API失败: {}", e.getMessage(), e);
|
||||
throw new IOException("百炼API调用失败: " + e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private void handleGenerationResult(MultiModalConversationResult message, StringBuilder reasoningContent, StringBuilder finalContent) {
|
||||
String re = message.getOutput().getChoices().get(0).getMessage().getReasoningContent();
|
||||
String reasoning = Objects.isNull(re) ? "" : re;
|
||||
|
||||
List<Map<String, Object>> content = message.getOutput().getChoices().get(0).getMessage()
|
||||
.getContent();
|
||||
if (!reasoning.isEmpty()) {
|
||||
reasoningContent.append(reasoning);
|
||||
}
|
||||
|
||||
if (Objects.nonNull(content) && !content.isEmpty()) {
|
||||
Object text = content.get(0).get("text");
|
||||
finalContent.append(text);
|
||||
}
|
||||
|
||||
// 检查是否是最后一个响应
|
||||
if (message.getOutput().getChoices().get(0).getFinishReason() != null) {
|
||||
// 输出思考过程
|
||||
if (reasoningContent.length() > 0) {
|
||||
System.out.println("====================思考过程====================");
|
||||
System.out.println(reasoningContent.toString());
|
||||
}
|
||||
// 输出完整回复
|
||||
if (finalContent.length() > 0) {
|
||||
System.out.println("====================完整回复====================");
|
||||
System.out.println(finalContent.toString());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
package org.ruoyi.service.impl;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.MultipartBody;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.OkHttpClient.Builder;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.service.DashscopeService;
|
||||
import org.ruoyi.service.PdfImageExtractService;
|
||||
import org.ruoyi.utils.ZipUtils;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
/**
|
||||
* PDF图片提取服务实现类
|
||||
*/
|
||||
@Service
|
||||
@Slf4j
|
||||
@Data
|
||||
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
|
||||
|
||||
@Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
@Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
@Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
private final OkHttpClient client = new Builder()
|
||||
.connectTimeout(100, TimeUnit.SECONDS)
|
||||
.readTimeout(150, TimeUnit.SECONDS)
|
||||
.writeTimeout(150, TimeUnit.SECONDS)
|
||||
.callTimeout(300, TimeUnit.SECONDS)
|
||||
.build();
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
// @Override
|
||||
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
|
||||
throws IOException {
|
||||
// 构建multipart请求
|
||||
RequestBody requestBody = new MultipartBody.Builder()
|
||||
.setType(MultipartBody.FORM)
|
||||
.addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
|
||||
RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
|
||||
.addFormDataPart("format", imageFormat)
|
||||
.addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
|
||||
.build();
|
||||
|
||||
// 创建请求
|
||||
Request request = new Request.Builder()
|
||||
.url(serviceUrl + "/api/v1/misc/extract-images")
|
||||
.post(requestBody)
|
||||
.build();
|
||||
|
||||
// 执行请求
|
||||
try (Response response = client.newCall(request).execute()) {
|
||||
if (!response.isSuccessful()) {
|
||||
throw new IOException("请求失败: " + response.code());
|
||||
}
|
||||
return response.body().bytes();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理文件内容
|
||||
*
|
||||
* @param unzip Base64编码的图片数组
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
// @Override
|
||||
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
|
||||
List<PdfFileContentResult> results = new ArrayList<>();
|
||||
int i = 0;
|
||||
for (String base64Image : unzip) {
|
||||
// 构建请求JSON
|
||||
String requestJson = String.format("{"
|
||||
+ "\"model\": \"gpt-4o\","
|
||||
+ "\"stream\": false,"
|
||||
+ "\"messages\": [{"
|
||||
+ "\"role\": \"user\","
|
||||
+ "\"content\": [{"
|
||||
+ "\"type\": \"text\","
|
||||
+ "\"text\": \"这张图片有什么\""
|
||||
+ "}, {"
|
||||
+ "\"type\": \"image_url\","
|
||||
+ "\"image_url\": {"
|
||||
+ "\"url\": \"%s\""
|
||||
+ "}}"
|
||||
+ "]}],"
|
||||
+ "\"max_tokens\": 400"
|
||||
+ "}", base64Image);
|
||||
|
||||
// 创建请求
|
||||
Request request = new Request.Builder()
|
||||
.url(aiApiUrl)
|
||||
.addHeader("Authorization", "Bearer " + aiApiKey)
|
||||
.post(RequestBody.create(JSON, requestJson))
|
||||
.build();
|
||||
|
||||
// 执行请求
|
||||
try {
|
||||
log.info("=============call=" + ++i);
|
||||
|
||||
Response response = client.newCall(request).execute();
|
||||
log.info("=============response=" + response);
|
||||
if (!response.isSuccessful()) {
|
||||
throw new IOException("API请求失败: " + response.code() + response.toString());
|
||||
}
|
||||
|
||||
String responseBody = response.body().string();
|
||||
log.info("=============responseBody=" + responseBody);
|
||||
// 使用文件名(这里使用base64的前10个字符作为标识)和API返回内容创建结果对象
|
||||
String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
|
||||
results.add(new PdfFileContentResult(filename, responseBody));
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
|
||||
String format = "png";
|
||||
boolean allowDuplicates = true;
|
||||
// 获取ZIP数据
|
||||
byte[] zipData = this.extractImages(file, format, allowDuplicates);
|
||||
// 解压文件并识别图片内容并返回
|
||||
String[] unzip = ZipUtils.unzipForBase64(zipData);
|
||||
//解析图片内容
|
||||
return this.dealFileContent(unzip);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user