mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-03-22 00:53:44 +08:00
feat(更新日志):
更新日志 1. 移除个人微信模块 2. 移除直播模块 3. 移除gpts模块 4. 移除应用商店模块 5. 移除套餐管理模块 6. 移除兑换管理模块 ## 微信相关 小程序相关功能迁移至企业版 微信公众号/微信机器人迁移至企业版 微信支付迁移至企业版 ## 功能模块 智能体模块迁移至企业版 插件管理改为MCP应用并迁移至企业版 知识库: excel解析迁移至企业版 pdf图片解析迁移至企业版 milvus qdrant扩展 迁移至企业版
This commit is contained in:
@@ -1,7 +1,5 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import dev.langchain4j.data.document.Document;
|
||||
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.ruoyi.common.core.exception.UtilException;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelFileLoader implements ResourceLoader {
|
||||
private final TextSplitter textSplitter;
|
||||
private static final int DEFAULT_BUFFER_SIZE = 8192;
|
||||
@Override
|
||||
public String getContent(InputStream inputStream) {
|
||||
// 使用带缓冲的输入流包装(保持原流不自动关闭)
|
||||
try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
|
||||
ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
|
||||
Document document = apacheTikaDocumentParser.parse(bufferedStream);
|
||||
return document.text();
|
||||
} catch (IOException e) {
|
||||
String errorMsg = "Excel文件流读取失败";
|
||||
throw new UtilException(errorMsg, e);
|
||||
} catch (RuntimeException e) {
|
||||
String errorMsg = "Excel内容解析异常";
|
||||
throw new UtilException(errorMsg, e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getChunkList(String content, String kid) {
|
||||
return textSplitter.split(content, kid);
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,5 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
|
||||
import java.io.InputStream;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@ package org.ruoyi.chain.loader;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package org.ruoyi.chain.loader;
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
package org.ruoyi.chain.loader;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.ruoyi.chain.split.*;
|
||||
|
||||
import org.ruoyi.chain.split.CharacterTextSplitter;
|
||||
import org.ruoyi.chain.split.CodeTextSplitter;
|
||||
import org.ruoyi.chain.split.MarkdownTextSplitter;
|
||||
import org.ruoyi.constant.FileType;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@@ -12,8 +13,7 @@ public class ResourceLoaderFactory {
|
||||
private final CharacterTextSplitter characterTextSplitter;
|
||||
private final CodeTextSplitter codeTextSplitter;
|
||||
private final MarkdownTextSplitter markdownTextSplitter;
|
||||
private final TokenTextSplitter tokenTextSplitter;
|
||||
private final ExcelTextSplitter excelTextSplitter;
|
||||
|
||||
|
||||
public ResourceLoader getLoaderByFileType(String fileType){
|
||||
if (FileType.isTextFile(fileType)){
|
||||
@@ -24,8 +24,6 @@ public class ResourceLoaderFactory {
|
||||
return new PdfFileLoader(characterTextSplitter);
|
||||
} else if (FileType.isMdFile(fileType)) {
|
||||
return new MarkDownFileLoader(markdownTextSplitter);
|
||||
}else if (FileType.isExcel(fileType)) {
|
||||
return new ExcelFileLoader(excelTextSplitter);
|
||||
}else if (FileType.isCodeFile(fileType)) {
|
||||
return new CodeFileLoader(codeTextSplitter);
|
||||
}else {
|
||||
|
||||
@@ -4,7 +4,6 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
|
||||
@@ -3,7 +3,6 @@ package org.ruoyi.chain.split;
|
||||
import jakarta.annotation.Resource;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.springframework.context.annotation.Lazy;
|
||||
|
||||
@@ -2,7 +2,6 @@ package org.ruoyi.chain.split;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
package org.ruoyi.chain.split;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
@Slf4j
|
||||
public class ExcelTextSplitter implements TextSplitter{
|
||||
@Override
|
||||
public List<String> split(String content, String kid) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,6 @@ package org.ruoyi.chain.split;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.split.TextSplitter;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -1,18 +0,0 @@
|
||||
package org.ruoyi.constant;
|
||||
|
||||
/**
|
||||
* @Description:
|
||||
* @Date: 2025/5/14 下午2:04
|
||||
*/
|
||||
public class DealStatus {
|
||||
//未开始
|
||||
public static final Integer STATUS_10 = 10;
|
||||
//进行中
|
||||
public static final Integer STATUS_20 = 20;
|
||||
//已结束
|
||||
public static final Integer STATUS_30 = 30;
|
||||
//处理失败
|
||||
public static final Integer STATUS_40 = 40;
|
||||
|
||||
|
||||
}
|
||||
@@ -1,30 +0,0 @@
|
||||
package org.ruoyi.domain;
|
||||
|
||||
/**
|
||||
* 文件内容结果封装类
|
||||
*/
|
||||
public class PdfFileContentResult {
|
||||
private String filename;
|
||||
private String content;
|
||||
|
||||
public PdfFileContentResult(String filename, String content) {
|
||||
this.filename = filename;
|
||||
this.content = content;
|
||||
}
|
||||
|
||||
public String getFilename() {
|
||||
return filename;
|
||||
}
|
||||
|
||||
public void setFilename(String filename) {
|
||||
this.filename = filename;
|
||||
}
|
||||
|
||||
public String getContent() {
|
||||
return content;
|
||||
}
|
||||
|
||||
public void setContent(String content) {
|
||||
this.content = content;
|
||||
}
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
package org.ruoyi.service;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
/**
|
||||
* PDF图片提取服务接口
|
||||
*/
|
||||
public interface PdfImageExtractService {
|
||||
|
||||
/**
|
||||
* 从PDF文件中提取图片
|
||||
*
|
||||
* @param pdfFile PDF文件
|
||||
* @param imageFormat 输出图片格式 (png, jpeg, gif)
|
||||
* @param allowDuplicates 是否允许重复图片
|
||||
* @return 包含提取图片的ZIP文件的字节数组
|
||||
* @throws IOException 如果文件处理过程中发生错误
|
||||
*/
|
||||
byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
|
||||
throws IOException;
|
||||
|
||||
/**
|
||||
* 处理文件内容
|
||||
*
|
||||
* @param unzip Base64编码的图片数组
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
|
||||
|
||||
/**
|
||||
* 提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
|
||||
* @param file
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
|
||||
}
|
||||
@@ -1,149 +0,0 @@
|
||||
package org.ruoyi.service.impl;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Data;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import okhttp3.MediaType;
|
||||
import okhttp3.MultipartBody;
|
||||
import okhttp3.OkHttpClient;
|
||||
import okhttp3.OkHttpClient.Builder;
|
||||
import okhttp3.Request;
|
||||
import okhttp3.RequestBody;
|
||||
import okhttp3.Response;
|
||||
import org.ruoyi.common.core.domain.R;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.service.PdfImageExtractService;
|
||||
import org.ruoyi.utils.ZipUtils;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
/**
|
||||
* PDF图片提取服务实现类
|
||||
*/
|
||||
//@Service
|
||||
@Slf4j
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
|
||||
public class PdfImageExtractServiceImpl {
|
||||
|
||||
// @Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
// @Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
// @Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
private final OkHttpClient client = new Builder()
|
||||
.connectTimeout(100, TimeUnit.SECONDS)
|
||||
.readTimeout(150, TimeUnit.SECONDS)
|
||||
.writeTimeout(150, TimeUnit.SECONDS)
|
||||
.callTimeout(300, TimeUnit.SECONDS)
|
||||
.build();
|
||||
|
||||
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
|
||||
|
||||
// @Override
|
||||
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
|
||||
throws IOException {
|
||||
// 构建multipart请求
|
||||
RequestBody requestBody = new MultipartBody.Builder()
|
||||
.setType(MultipartBody.FORM)
|
||||
.addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
|
||||
RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
|
||||
.addFormDataPart("format", imageFormat)
|
||||
.addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
|
||||
.build();
|
||||
|
||||
// 创建请求
|
||||
Request request = new Request.Builder()
|
||||
.url(serviceUrl + "/api/v1/misc/extract-images")
|
||||
.post(requestBody)
|
||||
.build();
|
||||
|
||||
// 执行请求
|
||||
try (Response response = client.newCall(request).execute()) {
|
||||
if (!response.isSuccessful()) {
|
||||
throw new IOException("请求失败: " + response.code());
|
||||
}
|
||||
return response.body().bytes();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 处理文件内容
|
||||
*
|
||||
* @param unzip Base64编码的图片数组
|
||||
* @return 文件内容结果列表
|
||||
* @throws IOException 如果API调用过程中发生错误
|
||||
*/
|
||||
// @Override
|
||||
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
|
||||
List<PdfFileContentResult> results = new ArrayList<>();
|
||||
int i = 0;
|
||||
for (String base64Image : unzip) {
|
||||
// 构建请求JSON
|
||||
String requestJson = String.format("{"
|
||||
+ "\"model\": \"gpt-4o\","
|
||||
+ "\"stream\": false,"
|
||||
+ "\"messages\": [{"
|
||||
+ "\"role\": \"user\","
|
||||
+ "\"content\": [{"
|
||||
+ "\"type\": \"text\","
|
||||
+ "\"text\": \"这张图片有什么\""
|
||||
+ "}, {"
|
||||
+ "\"type\": \"image_url\","
|
||||
+ "\"image_url\": {"
|
||||
+ "\"url\": \"%s\""
|
||||
+ "}}"
|
||||
+ "]}],"
|
||||
+ "\"max_tokens\": 400"
|
||||
+ "}", base64Image);
|
||||
|
||||
// 创建请求
|
||||
Request request = new Request.Builder()
|
||||
.url(aiApiUrl)
|
||||
.addHeader("Authorization", "Bearer " + aiApiKey)
|
||||
.post(RequestBody.create(JSON, requestJson))
|
||||
.build();
|
||||
|
||||
// 执行请求
|
||||
try {
|
||||
log.info("=============call=" + ++i);
|
||||
|
||||
Response response = client.newCall(request).execute();
|
||||
log.info("=============response=" + response);
|
||||
if (!response.isSuccessful()) {
|
||||
throw new IOException("API请求失败: " + response.code() + response.toString());
|
||||
}
|
||||
|
||||
String responseBody = response.body().string();
|
||||
log.info("=============responseBody=" + responseBody);
|
||||
// 使用文件名(这里使用base64的前10个字符作为标识)和API返回内容创建结果对象
|
||||
String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
|
||||
results.add(new PdfFileContentResult(filename, responseBody));
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage());
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
// @Override
|
||||
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
|
||||
String format = "png";
|
||||
boolean allowDuplicates = true;
|
||||
// 获取ZIP数据
|
||||
byte[] zipData = this.extractImages(file, format, allowDuplicates);
|
||||
// 解压文件并识别图片内容并返回
|
||||
String[] unzip = ZipUtils.unzipForBase64(zipData);
|
||||
//解析图片内容
|
||||
return this.dealFileContent(unzip);
|
||||
}
|
||||
}
|
||||
@@ -11,9 +11,6 @@ import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||
import dev.langchain4j.store.embedding.filter.Filter;
|
||||
import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
|
||||
import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
|
||||
import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
|
||||
import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
|
||||
import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.SneakyThrows;
|
||||
@@ -23,16 +20,14 @@ import org.ruoyi.domain.bo.QueryVectorBo;
|
||||
import org.ruoyi.domain.bo.StoreEmbeddingBo;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 向量库管理
|
||||
*
|
||||
* @author ageer
|
||||
*/
|
||||
@Service
|
||||
@@ -45,51 +40,23 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
private EmbeddingStore<TextSegment> embeddingStore;
|
||||
|
||||
@Override
|
||||
public void createSchema(String kid,String modelName) {
|
||||
switch (modelName) {
|
||||
case "weaviate" -> {
|
||||
String protocol = configService.getConfigValue("weaviate", "protocol");
|
||||
String host = configService.getConfigValue("weaviate", "host");
|
||||
String className = configService.getConfigValue("weaviate", "classname");
|
||||
embeddingStore = WeaviateEmbeddingStore.builder()
|
||||
.scheme(protocol)
|
||||
.host(host)
|
||||
.objectClass(className + kid)
|
||||
.scheme(protocol)
|
||||
.avoidDups(true)
|
||||
.consistencyLevel("ALL")
|
||||
.build();
|
||||
}
|
||||
case "milvus" -> {
|
||||
String uri = configService.getConfigValue("milvus", "host");
|
||||
String collection = configService.getConfigValue("milvus", "collection");
|
||||
String dimension = configService.getConfigValue("milvus", "dimension");
|
||||
embeddingStore = MilvusEmbeddingStore.builder()
|
||||
.uri(uri)
|
||||
.collectionName(collection + kid)
|
||||
.dimension(Integer.parseInt(dimension))
|
||||
.build();
|
||||
}
|
||||
case "qdrant" -> {
|
||||
String host = configService.getConfigValue("qdrant", "host");
|
||||
String port = configService.getConfigValue("qdrant", "port");
|
||||
String collectionName = configService.getConfigValue("qdrant", "collectionName");
|
||||
embeddingStore = QdrantEmbeddingStore.builder()
|
||||
.host(host)
|
||||
.port(Integer.parseInt(port))
|
||||
.collectionName(collectionName)
|
||||
.build();
|
||||
}
|
||||
default -> {
|
||||
//使用内存
|
||||
embeddingStore = new InMemoryEmbeddingStore<>();
|
||||
}
|
||||
}
|
||||
public void createSchema(String kid, String modelName) {
|
||||
String protocol = configService.getConfigValue("weaviate", "protocol");
|
||||
String host = configService.getConfigValue("weaviate", "host");
|
||||
String className = configService.getConfigValue("weaviate", "classname");
|
||||
embeddingStore = WeaviateEmbeddingStore.builder()
|
||||
.scheme(protocol)
|
||||
.host(host)
|
||||
.objectClass(className + kid)
|
||||
.scheme(protocol)
|
||||
.avoidDups(true)
|
||||
.consistencyLevel("ALL")
|
||||
.build();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) {
|
||||
createSchema(storeEmbeddingBo.getKid(),storeEmbeddingBo.getVectorModelName());
|
||||
createSchema(storeEmbeddingBo.getKid(), storeEmbeddingBo.getVectorModelName());
|
||||
EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getEmbeddingModelName(),
|
||||
storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl());
|
||||
List<String> chunkList = storeEmbeddingBo.getChunkList();
|
||||
@@ -101,22 +68,22 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
Embedding embedding = embeddingModel.embed(chunkList.get(i)).content();
|
||||
TextSegment segment = TextSegment.from(chunkList.get(i));
|
||||
segment.metadata().putAll(dataSchema);
|
||||
embeddingStore.add(embedding,segment);
|
||||
embeddingStore.add(embedding, segment);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<String> getQueryVector(QueryVectorBo queryVectorBo) {
|
||||
createSchema(queryVectorBo.getKid(),queryVectorBo.getVectorModelName());
|
||||
createSchema(queryVectorBo.getKid(), queryVectorBo.getVectorModelName());
|
||||
EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getEmbeddingModelName(),
|
||||
queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl());
|
||||
// Filter simpleFilter = new IsEqualTo("kid", queryVectorBo.getKid());
|
||||
// Filter simpleFilter = new IsEqualTo("kid", queryVectorBo.getKid());
|
||||
Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content();
|
||||
EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
|
||||
.queryEmbedding(queryEmbedding)
|
||||
.maxResults(queryVectorBo.getMaxResults())
|
||||
// 添加过滤条件
|
||||
// .filter(simpleFilter)
|
||||
// .filter(simpleFilter)
|
||||
.build();
|
||||
List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches();
|
||||
List<String> results = new ArrayList<>();
|
||||
@@ -126,24 +93,24 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
|
||||
|
||||
@Override
|
||||
public void removeByKid(String kid,String modelName) {
|
||||
createSchema(kid,modelName);
|
||||
public void removeByKid(String kid, String modelName) {
|
||||
createSchema(kid, modelName);
|
||||
// 根据条件删除向量数据
|
||||
Filter simpleFilter = new IsEqualTo("kid", kid);
|
||||
embeddingStore.removeAll(simpleFilter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeByDocId(String kid, String docId,String modelName) {
|
||||
createSchema(kid,modelName);
|
||||
public void removeByDocId(String kid, String docId, String modelName) {
|
||||
createSchema(kid, modelName);
|
||||
// 根据条件删除向量数据
|
||||
Filter simpleFilterByDocId = new IsEqualTo("docId", docId);
|
||||
embeddingStore.removeAll(simpleFilterByDocId);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void removeByKidAndFid(String kid, String fid,String modelName) {
|
||||
createSchema(kid,modelName);
|
||||
public void removeByKidAndFid(String kid, String fid, String modelName) {
|
||||
createSchema(kid, modelName);
|
||||
// 根据条件删除向量数据
|
||||
Filter simpleFilterByKid = new IsEqualTo("kid", kid);
|
||||
Filter simpleFilterFid = new IsEqualTo("fid", fid);
|
||||
@@ -157,25 +124,18 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
@SneakyThrows
|
||||
public EmbeddingModel getEmbeddingModel(String modelName, String apiKey, String baseUrl) {
|
||||
EmbeddingModel embeddingModel;
|
||||
if(TEXT_EMBEDDING_3_SMALL.toString().equals(modelName)) {
|
||||
embeddingModel = OpenAiEmbeddingModel.builder()
|
||||
.apiKey(apiKey)
|
||||
.baseUrl(baseUrl)
|
||||
.modelName(modelName)
|
||||
.build();
|
||||
// TODO 添加枚举
|
||||
}else if("quentinz/bge-large-zh-v1.5".equals(modelName)) {
|
||||
if ("quentinz/bge-large-zh-v1.5".equals(modelName)) {
|
||||
embeddingModel = OllamaEmbeddingModel.builder()
|
||||
.baseUrl(baseUrl)
|
||||
.modelName(modelName)
|
||||
.build();
|
||||
}else if("baai/bge-m3".equals(modelName)) {
|
||||
} else if ("baai/bge-m3".equals(modelName)) {
|
||||
embeddingModel = OpenAiEmbeddingModel.builder()
|
||||
.apiKey(apiKey)
|
||||
.baseUrl(baseUrl)
|
||||
.modelName(modelName)
|
||||
.build();
|
||||
}else {
|
||||
} else {
|
||||
throw new ServiceException("未找到对应向量化模型!");
|
||||
}
|
||||
return embeddingModel;
|
||||
|
||||
@@ -1,183 +0,0 @@
|
||||
package org.ruoyi.utils;
|
||||
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Base64;
|
||||
import java.util.List;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import org.springframework.mock.web.MockMultipartFile;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
/**
|
||||
* ZIP文件处理工具类
|
||||
*/
|
||||
public class ZipUtils {
|
||||
|
||||
/**
|
||||
* 解压ZIP文件到指定目录
|
||||
*
|
||||
* @param zipData ZIP文件的字节数组
|
||||
* @param destDir 目标目录
|
||||
* @return 解压后的文件路径列表
|
||||
* @throws IOException 如果解压过程中发生错误
|
||||
*/
|
||||
public static String[] unzip(byte[] zipData, String destDir) throws IOException {
|
||||
File destDirFile = new File(destDir);
|
||||
if (!destDirFile.exists()) {
|
||||
destDirFile.mkdirs();
|
||||
}
|
||||
|
||||
List<String> extractedPaths = new ArrayList<>();
|
||||
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
|
||||
ZipInputStream zis = new ZipInputStream(bis)) {
|
||||
|
||||
ZipEntry zipEntry;
|
||||
while ((zipEntry = zis.getNextEntry()) != null) {
|
||||
String filePath = destDir + File.separator + zipEntry.getName();
|
||||
if (!zipEntry.isDirectory()) {
|
||||
extractFile(zis, filePath);
|
||||
extractedPaths.add(filePath);
|
||||
} else {
|
||||
new File(filePath).mkdirs();
|
||||
}
|
||||
zis.closeEntry();
|
||||
}
|
||||
}
|
||||
return extractedPaths.toArray(new String[0]);
|
||||
}
|
||||
|
||||
private static void extractFile(ZipInputStream zis, String filePath) throws IOException {
|
||||
try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath))) {
|
||||
byte[] buffer = new byte[4096];
|
||||
int read;
|
||||
while ((read = zis.read(buffer)) != -1) {
|
||||
bos.write(buffer, 0, read);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 解压ZIP文件并返回文件内容的Base64编码字符串数组
|
||||
*
|
||||
* @param zipData ZIP文件的字节数组
|
||||
* @return Base64编码的文件内容数组
|
||||
* @throws IOException 如果解压过程中发生错误
|
||||
*/
|
||||
public static String[] unzipForBase64(byte[] zipData) throws IOException {
|
||||
List<String> base64Contents = new ArrayList<>();
|
||||
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
|
||||
ZipInputStream zis = new ZipInputStream(bis)) {
|
||||
|
||||
ZipEntry zipEntry;
|
||||
while ((zipEntry = zis.getNextEntry()) != null) {
|
||||
if (!zipEntry.isDirectory()) {
|
||||
// 读取文件内容到内存
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
byte[] buffer = new byte[4096];
|
||||
int read;
|
||||
while ((read = zis.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, read);
|
||||
}
|
||||
|
||||
// 将文件内容转换为Base64字符串
|
||||
String base64Content = Base64.getEncoder().encodeToString(baos.toByteArray());
|
||||
base64Contents.add(base64Content);
|
||||
}
|
||||
zis.closeEntry();
|
||||
}
|
||||
}
|
||||
return base64Contents.toArray(new String[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 解压ZIP文件并返回MultipartFile数组
|
||||
*
|
||||
* @param zipData ZIP文件的字节数组
|
||||
* @return MultipartFile数组
|
||||
* @throws IOException 如果解压过程中发生错误
|
||||
*/
|
||||
public static MultipartFile[] unzipToMultipartFiles(byte[] zipData) throws IOException {
|
||||
List<MultipartFile> multipartFiles = new ArrayList<>();
|
||||
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
|
||||
ZipInputStream zis = new ZipInputStream(bis)) {
|
||||
|
||||
ZipEntry zipEntry;
|
||||
while ((zipEntry = zis.getNextEntry()) != null) {
|
||||
if (!zipEntry.isDirectory()) {
|
||||
// 读取文件内容到内存
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
byte[] buffer = new byte[4096];
|
||||
int read;
|
||||
while ((read = zis.read(buffer)) != -1) {
|
||||
baos.write(buffer, 0, read);
|
||||
}
|
||||
|
||||
// 创建MultipartFile对象
|
||||
String fileName = zipEntry.getName();
|
||||
byte[] content = baos.toByteArray();
|
||||
String contentType = determineContentType(fileName);
|
||||
|
||||
MultipartFile multipartFile = new MockMultipartFile(
|
||||
fileName, // 文件名
|
||||
fileName, // 原始文件名
|
||||
contentType, // 内容类型
|
||||
content // 文件内容
|
||||
);
|
||||
|
||||
multipartFiles.add(multipartFile);
|
||||
}
|
||||
zis.closeEntry();
|
||||
}
|
||||
}
|
||||
return multipartFiles.toArray(new MultipartFile[0]);
|
||||
}
|
||||
|
||||
/**
|
||||
* 根据文件名确定内容类型
|
||||
*
|
||||
* @param fileName 文件名
|
||||
* @return 内容类型
|
||||
*/
|
||||
private static String determineContentType(String fileName) {
|
||||
String extension = "";
|
||||
int i = fileName.lastIndexOf('.');
|
||||
if (i > 0) {
|
||||
extension = fileName.substring(i + 1).toLowerCase();
|
||||
}
|
||||
|
||||
switch (extension) {
|
||||
case "txt":
|
||||
return "text/plain";
|
||||
case "html":
|
||||
case "htm":
|
||||
return "text/html";
|
||||
case "pdf":
|
||||
return "application/pdf";
|
||||
case "jpg":
|
||||
case "jpeg":
|
||||
return "image/jpeg";
|
||||
case "png":
|
||||
return "image/png";
|
||||
case "gif":
|
||||
return "image/gif";
|
||||
case "doc":
|
||||
case "docx":
|
||||
return "application/msword";
|
||||
case "xls":
|
||||
case "xlsx":
|
||||
return "application/vnd.ms-excel";
|
||||
case "xml":
|
||||
return "application/xml";
|
||||
case "json":
|
||||
return "application/json";
|
||||
default:
|
||||
return "application/octet-stream";
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user