mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-03-14 05:03:43 +08:00
feat: 知识库上传逻辑调整
This commit is contained in:
@@ -1,13 +1,11 @@
|
||||
package org.ruoyi.service.impl;
|
||||
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import com.google.protobuf.ServiceException;
|
||||
import dev.langchain4j.data.embedding.Embedding;
|
||||
import dev.langchain4j.data.segment.TextSegment;
|
||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||
import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
|
||||
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
|
||||
import dev.langchain4j.model.output.Response;
|
||||
import dev.langchain4j.store.embedding.EmbeddingMatch;
|
||||
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
|
||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||
@@ -31,6 +29,7 @@ import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
/**
|
||||
* 向量库管理
|
||||
@@ -131,31 +130,7 @@ public class VectorStoreServiceImpl implements VectorStoreService {
|
||||
createSchema(kid,modelName);
|
||||
// 根据条件删除向量数据
|
||||
Filter simpleFilter = new IsEqualTo("kid", kid);
|
||||
removeByFilter(simpleFilter);
|
||||
}
|
||||
|
||||
public void removeByFilter(Filter filter) {
|
||||
List<Float> dummyVector = new ArrayList<>();
|
||||
// TODO 模型维度
|
||||
int dimension = 1024;
|
||||
for (int i = 0; i < dimension; i++) {
|
||||
dummyVector.add(0.0f);
|
||||
}
|
||||
Embedding dummyEmbedding = Embedding.from(dummyVector);
|
||||
EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
|
||||
.queryEmbedding(dummyEmbedding)
|
||||
.filter(filter)
|
||||
.maxResults(10000)
|
||||
.build();
|
||||
// 搜索
|
||||
List<String> idsToDelete = embeddingStore.search(request)
|
||||
.matches().stream()
|
||||
.map(EmbeddingMatch::embeddingId)
|
||||
.collect(Collectors.toList());
|
||||
// 删除
|
||||
if (!idsToDelete.isEmpty()) {
|
||||
embeddingStore.removeAll(idsToDelete);
|
||||
}
|
||||
embeddingStore.removeAll(simpleFilter);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
||||
@@ -1,385 +0,0 @@
|
||||
package org.ruoyi.chat.service.knowledge;
|
||||
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
|
||||
import java.util.Collection;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.ruoyi.chain.loader.ResourceLoaderFactory;
|
||||
import org.ruoyi.constant.DealStatus;
|
||||
import org.ruoyi.domain.KnowledgeAttach;
|
||||
import org.ruoyi.domain.KnowledgeAttachPic;
|
||||
import org.ruoyi.domain.KnowledgeFragment;
|
||||
import org.ruoyi.domain.KnowledgeInfo;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.domain.bo.StoreEmbeddingBo;
|
||||
import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.domain.vo.KnowledgeAttachVo;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.mapper.KnowledgeAttachMapper;
|
||||
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
|
||||
import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
||||
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
|
||||
import org.ruoyi.system.domain.vo.SysOssVo;
|
||||
import org.ruoyi.system.service.ISysOssService;
|
||||
import org.ruoyi.utils.ZipUtils;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
|
||||
import com.fasterxml.jackson.databind.JsonNode;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
|
||||
/**
|
||||
* @Description:
|
||||
* @Date: 2025/5/15 下午4:29
|
||||
*/
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DealFileService {
|
||||
private static final Logger log = LoggerFactory.getLogger(DealFileService.class);
|
||||
|
||||
private final KnowledgeInfoMapper baseMapper;
|
||||
|
||||
private final VectorStoreService vectorStoreService;
|
||||
|
||||
private final ResourceLoaderFactory resourceLoaderFactory;
|
||||
|
||||
private final KnowledgeFragmentMapper fragmentMapper;
|
||||
|
||||
private final KnowledgeAttachMapper attachMapper;
|
||||
|
||||
private final IChatModelService chatModelService;
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
private final KnowledgeAttachPicMapper picMapper;
|
||||
|
||||
@Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
@Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
@Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
|
||||
@Async
|
||||
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
|
||||
try {
|
||||
//锁定数据 更改VectorStatus 到进行中
|
||||
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId())
|
||||
) == 0) {
|
||||
return;
|
||||
}
|
||||
List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeFragment>()
|
||||
.eq(KnowledgeFragment::getKid, attachItem.getKid())
|
||||
.eq(KnowledgeFragment::getDocId, attachItem.getDocId())
|
||||
);
|
||||
if (ObjectUtil.isEmpty(knowledgeFragments)) {
|
||||
throw new Exception("文件段落为空");
|
||||
}
|
||||
List<String> fids = knowledgeFragments.stream()
|
||||
.map(KnowledgeFragment::getFid)
|
||||
.collect(Collectors.toList());
|
||||
if (ObjectUtil.isEmpty(fids)) {
|
||||
throw new Exception("fids 为空");
|
||||
}
|
||||
List<String> chunkList = knowledgeFragments.stream()
|
||||
.map(KnowledgeFragment::getContent)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (ObjectUtil.isEmpty(chunkList)) {
|
||||
throw new Exception("chunkList 为空");
|
||||
}
|
||||
// 通过kid查询知识库信息
|
||||
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
|
||||
.eq(KnowledgeInfo::getId, attachItem.getKid()));
|
||||
// 通过向量模型查询模型信息
|
||||
ChatModelVo chatModelVo = chatModelService.selectModelByName(
|
||||
knowledgeInfoVo.getEmbeddingModelName());
|
||||
|
||||
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
|
||||
storeEmbeddingBo.setKid(attachItem.getKid());
|
||||
storeEmbeddingBo.setDocId(attachItem.getDocId());
|
||||
storeEmbeddingBo.setFids(fids);
|
||||
storeEmbeddingBo.setChunkList(chunkList);
|
||||
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
|
||||
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
|
||||
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
|
||||
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
|
||||
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
|
||||
|
||||
//设置处理完成
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId()));
|
||||
} catch (Exception e) {
|
||||
//设置处理失败
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
|
||||
.set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId()));
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Async
|
||||
public void dealPicStatus(KnowledgeAttach attachItem) throws Exception {
|
||||
try {
|
||||
//锁定数据 更改picStatus 到进行中
|
||||
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId())
|
||||
) == 0) {
|
||||
return;
|
||||
}
|
||||
//获取附件
|
||||
if (ObjectUtil.isEmpty(attachItem.getOssId())) {
|
||||
log.error("==========OssId 为空,attachItem={}", attachItem);
|
||||
throw new Exception("OssId 为空");
|
||||
}
|
||||
//获取oss文件
|
||||
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
|
||||
//拆解出图片ZIP
|
||||
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
|
||||
aiApiUrl, aiApiKey);
|
||||
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
|
||||
//解压zip,得到图片文件
|
||||
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
|
||||
//上传文件到OSS,写入表
|
||||
for (MultipartFile file : multipartFiles) {
|
||||
//先查找是否有相同图片名称,先做删除
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.eq(KnowledgeAttachPic::getKid, attachItem.getKid())
|
||||
.eq(KnowledgeAttachPic::getAid, attachItem.getId())
|
||||
.eq(KnowledgeAttachPic::getDocName, file.getOriginalFilename())
|
||||
);
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
Collection<Long> ossIds = knowledgeAttachPics.stream()
|
||||
.map(KnowledgeAttachPic::getOssId)
|
||||
.collect(Collectors.toList());
|
||||
ossService.deleteWithValidByIds(ossIds, false);
|
||||
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
|
||||
.collect(Collectors.toList());
|
||||
picMapper.deleteByIds(collect);
|
||||
}
|
||||
|
||||
SysOssVo upload = ossService.upload(file);
|
||||
KnowledgeAttachPic entity = new KnowledgeAttachPic();
|
||||
entity.setKid(attachItem.getKid());
|
||||
entity.setAid(String.valueOf(attachItem.getId()));
|
||||
entity.setDocName(file.getOriginalFilename());
|
||||
entity.setDocType(
|
||||
file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1));
|
||||
entity.setOssId(upload.getOssId());
|
||||
int[] ints = extractPageNumbers(file.getOriginalFilename());
|
||||
if (ObjectUtil.isNotEmpty(ints)) {
|
||||
assert ints != null;
|
||||
if (ints.length == 2) {
|
||||
entity.setPageNum(ints[0]);
|
||||
entity.setIndexNum(ints[1]);
|
||||
}
|
||||
}
|
||||
picMapper.insert(entity);
|
||||
}
|
||||
|
||||
//设置处理完成
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId()));
|
||||
} catch (Exception e) {
|
||||
//设置处理失败
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
|
||||
.set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId()));
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@Async
|
||||
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
|
||||
try {
|
||||
//锁定数据 更改 getPicAnysStatus 到进行中
|
||||
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttachPic::getId, picItem.getId())
|
||||
) == 0) {
|
||||
return;
|
||||
}
|
||||
SysOssVo ossVo = ossService.getById(picItem.getOssId());
|
||||
if (ObjectUtil.isNotEmpty(ossVo)) {
|
||||
String fileStr = ossService.downloadByByte(picItem.getOssId());
|
||||
//调用第三方 分析图片内容
|
||||
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
|
||||
serviceUrl,
|
||||
aiApiUrl, aiApiKey);
|
||||
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
|
||||
new String[]{fileStr});
|
||||
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
|
||||
for (PdfFileContentResult resultItem : pdfFileContentResults) {
|
||||
//图片解析内容回写到pic表
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getContent, parseContent(resultItem.getContent()))
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttachPic::getId, picItem.getId()));
|
||||
//将图片解析内容 写入段落表 fragment
|
||||
KnowledgeAttachVo knowledgeAttachVo = attachMapper.selectVoById(picItem.getAid());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachVo)) {
|
||||
String fid = RandomUtil.randomString(10);
|
||||
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
|
||||
knowledgeFragment.setKid(knowledgeAttachVo.getKid());
|
||||
knowledgeFragment.setDocId(knowledgeAttachVo.getDocId());
|
||||
knowledgeFragment.setFid(fid);
|
||||
knowledgeFragment.setIdx(0);
|
||||
knowledgeFragment.setContent(parseContent(resultItem.getContent()));
|
||||
knowledgeFragment.setCreateTime(new Date());
|
||||
fragmentMapper.insert(knowledgeFragment);
|
||||
|
||||
//更新attach表,需要所有图片都处理完毕
|
||||
// 查询非30状态(完成状态)的记录数量
|
||||
long nonStatus30Count = picMapper.selectCount(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.ne(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttachPic::getAid, picItem.getAid())
|
||||
);
|
||||
if (nonStatus30Count == 0) {
|
||||
// 执行表更新操作
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getId, picItem.getAid()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
//失败
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40)
|
||||
.set(KnowledgeAttachPic::getRemark, picItem.getRemark() + e.getMessage())
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttachPic::getId, picItem.getId()));
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* 从文件名中提取page后面的两个数字
|
||||
*
|
||||
* @param fileName 文件名
|
||||
* @return 包含两个数字的数组,如果未找到则返回null
|
||||
*/
|
||||
public static int[] extractPageNumbers(String fileName) {
|
||||
// 查找"page_"的位置
|
||||
int pageIndex = fileName.indexOf("page_");
|
||||
|
||||
if (pageIndex == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// 从"page_"后开始截取
|
||||
String afterPage = fileName.substring(pageIndex + 5);
|
||||
|
||||
// 按下划线分割
|
||||
String[] parts = afterPage.split("_");
|
||||
|
||||
if (parts.length >= 2) {
|
||||
try {
|
||||
// 提取两个数字
|
||||
int firstNumber = Integer.parseInt(parts[0]);
|
||||
|
||||
// 对于第二个数字,需要去掉可能的文件扩展名
|
||||
String secondPart = parts[1];
|
||||
int dotIndex = secondPart.indexOf(".");
|
||||
if (dotIndex != -1) {
|
||||
secondPart = secondPart.substring(0, dotIndex);
|
||||
}
|
||||
|
||||
int secondNumber = Integer.parseInt(secondPart);
|
||||
|
||||
return new int[]{firstNumber, secondNumber};
|
||||
} catch (NumberFormatException e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public static String parseContent(String jsonString) {
|
||||
try {
|
||||
// 创建ObjectMapper实例
|
||||
ObjectMapper objectMapper = new ObjectMapper();
|
||||
|
||||
// 解析JSON字符串
|
||||
JsonNode rootNode = objectMapper.readTree(jsonString);
|
||||
|
||||
// 获取choices数组的第一个元素
|
||||
JsonNode choicesNode = rootNode.get("choices");
|
||||
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
|
||||
// 获取第一个choice
|
||||
JsonNode firstChoice = choicesNode.get(0);
|
||||
|
||||
// 获取message节点
|
||||
JsonNode messageNode = firstChoice.get("message");
|
||||
if (messageNode != null) {
|
||||
// 获取content字段的值
|
||||
JsonNode contentNode = messageNode.get("content");
|
||||
if (contentNode != null) {
|
||||
return contentNode.asText();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return "无法找到content内容";
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
return "解析JSON时发生错误: " + e.getMessage();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
@@ -4,29 +4,21 @@ import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
||||
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
import org.ruoyi.chain.loader.ResourceLoaderFactory;
|
||||
import org.ruoyi.common.core.domain.model.LoginUser;
|
||||
import org.ruoyi.common.core.utils.MapstructUtils;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.common.satoken.utils.LoginHelper;
|
||||
import org.ruoyi.constant.DealStatus;
|
||||
import org.ruoyi.constant.FileType;
|
||||
import org.ruoyi.core.page.PageQuery;
|
||||
import org.ruoyi.core.page.TableDataInfo;
|
||||
import org.ruoyi.domain.ChatModel;
|
||||
import org.ruoyi.domain.KnowledgeAttach;
|
||||
import org.ruoyi.domain.KnowledgeAttachPic;
|
||||
import org.ruoyi.domain.KnowledgeFragment;
|
||||
import org.ruoyi.domain.KnowledgeInfo;
|
||||
import org.ruoyi.domain.PdfFileContentResult;
|
||||
import org.ruoyi.domain.bo.KnowledgeInfoBo;
|
||||
import org.ruoyi.domain.bo.KnowledgeInfoUploadBo;
|
||||
import org.ruoyi.domain.bo.StoreEmbeddingBo;
|
||||
@@ -34,28 +26,21 @@ import org.ruoyi.domain.vo.ChatModelVo;
|
||||
import org.ruoyi.domain.vo.KnowledgeAttachVo;
|
||||
import org.ruoyi.domain.vo.KnowledgeInfoVo;
|
||||
import org.ruoyi.mapper.KnowledgeAttachMapper;
|
||||
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
|
||||
import org.ruoyi.mapper.KnowledgeFragmentMapper;
|
||||
import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.PdfImageExtractService;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
|
||||
import org.ruoyi.system.domain.vo.SysOssVo;
|
||||
import org.ruoyi.utils.ZipUtils;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.system.service.ISysOssService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.ruoyi.system.service.ISysOssService;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
|
||||
/**
|
||||
@@ -69,6 +54,7 @@ import java.util.*;
|
||||
public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class);
|
||||
|
||||
private final KnowledgeInfoMapper baseMapper;
|
||||
|
||||
private final VectorStoreService vectorStoreService;
|
||||
@@ -83,19 +69,6 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
// private final PdfImageExtractService pdfImageExtractService;
|
||||
|
||||
private final KnowledgeAttachPicMapper picMapper;
|
||||
|
||||
private final DealFileService dealFileService;
|
||||
|
||||
@Value("${pdf.extract.service.url}")
|
||||
private String serviceUrl;
|
||||
@Value("${pdf.extract.ai-api.url}")
|
||||
private String aiApiUrl;
|
||||
@Value("${pdf.extract.ai-api.key}")
|
||||
private String aiApiKey;
|
||||
|
||||
/**
|
||||
* 查询知识库
|
||||
*/
|
||||
@@ -207,45 +180,16 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
@Override
|
||||
@Transactional(rollbackFor = Exception.class)
|
||||
public void removeKnowledge(String id) {
|
||||
Map<String, Object> map = new HashMap<>();
|
||||
map.put("kid", id);
|
||||
Map<String,Object> map = new HashMap<>();
|
||||
map.put("kid",id);
|
||||
List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map);
|
||||
check(knowledgeInfoList);
|
||||
// 删除向量库信息
|
||||
knowledgeInfoList.forEach(knowledgeInfoVo -> {
|
||||
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),
|
||||
knowledgeInfoVo.getVectorModelName());
|
||||
});
|
||||
// knowledgeInfoList.forEach(knowledgeInfoVo -> {
|
||||
// vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName());
|
||||
// });
|
||||
// 删除附件和知识片段
|
||||
fragmentMapper.deleteByMap(map);
|
||||
List<KnowledgeAttachVo> knowledgeAttachVos = attachMapper.selectVoByMap(map);
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
|
||||
Collection<Long> ossIds = knowledgeAttachVos.stream()
|
||||
.map(KnowledgeAttachVo::getOssId)
|
||||
.collect(Collectors.toList());
|
||||
//删除oss
|
||||
ossService.deleteWithValidByIds(ossIds, false);
|
||||
|
||||
//删除图片oss
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.in(KnowledgeAttachPic::getKid,
|
||||
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
|
||||
.collect(Collectors.toList()))
|
||||
.in(KnowledgeAttachPic::getAid,
|
||||
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
|
||||
.collect(Collectors.toList()))
|
||||
);
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
Collection<Long> tossIds = knowledgeAttachPics.stream()
|
||||
.map(KnowledgeAttachPic::getOssId)
|
||||
.collect(Collectors.toList());
|
||||
ossService.deleteWithValidByIds(tossIds, false);
|
||||
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
|
||||
.collect(Collectors.toList());
|
||||
picMapper.deleteByIds(collect);
|
||||
}
|
||||
}
|
||||
attachMapper.deleteByMap(map);
|
||||
// 删除知识库
|
||||
baseMapper.deleteByMap(map);
|
||||
@@ -257,11 +201,6 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
|
||||
public void storeContent(MultipartFile file, String kid) {
|
||||
if (file == null || file.isEmpty()) {
|
||||
throw new IllegalArgumentException("File cannot be null or empty");
|
||||
}
|
||||
|
||||
SysOssVo uploadDto = null;
|
||||
String fileName = file.getOriginalFilename();
|
||||
List<String> chunkList = new ArrayList<>();
|
||||
KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
|
||||
@@ -269,18 +208,15 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
String docId = RandomUtil.randomString(10);
|
||||
knowledgeAttach.setDocId(docId);
|
||||
knowledgeAttach.setDocName(fileName);
|
||||
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1));
|
||||
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1));
|
||||
String content = "";
|
||||
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(
|
||||
knowledgeAttach.getDocType());
|
||||
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType());
|
||||
List<String> fids = new ArrayList<>();
|
||||
try {
|
||||
content = resourceLoader.getContent(file.getInputStream());
|
||||
chunkList = resourceLoader.getChunkList(content, kid);
|
||||
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
|
||||
if (CollUtil.isNotEmpty(chunkList)) {
|
||||
// Upload file to OSS
|
||||
uploadDto = ossService.upload(file);
|
||||
for (int i = 0; i < chunkList.size(); i++) {
|
||||
String fid = RandomUtil.randomString(10);
|
||||
fids.add(fid);
|
||||
@@ -300,21 +236,25 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
knowledgeAttach.setContent(content);
|
||||
knowledgeAttach.setCreateTime(new Date());
|
||||
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
|
||||
knowledgeAttach.setOssId(uploadDto.getOssId());
|
||||
//只有pdf文件 才需要拆解图片和分析图片内容
|
||||
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
|
||||
} else {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
|
||||
}
|
||||
//所有文件上传后,都需要同步到向量数据库
|
||||
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
|
||||
}
|
||||
attachMapper.insert(knowledgeAttach);
|
||||
|
||||
// 通过kid查询知识库信息
|
||||
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
|
||||
.eq(KnowledgeInfo::getId, kid));
|
||||
|
||||
// 通过向量模型查询模型信息
|
||||
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName());
|
||||
|
||||
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
|
||||
storeEmbeddingBo.setKid(kid);
|
||||
storeEmbeddingBo.setDocId(docId);
|
||||
storeEmbeddingBo.setFids(fids);
|
||||
storeEmbeddingBo.setChunkList(chunkList);
|
||||
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
|
||||
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
|
||||
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
|
||||
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
|
||||
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -331,79 +271,4 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第一步 定时 拆解PDF文件中的图片
|
||||
*/
|
||||
//@Scheduled(fixedDelay = 15000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttachPic() throws Exception {
|
||||
//处理 拆解PDF文件中的图片的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
dealFileService.dealPicStatus(attachItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 第二步 定时 解析图片内容
|
||||
*/
|
||||
//@Scheduled(fixedDelay = 15000)
|
||||
public void dealKnowledgeAttachPicAnys() throws Exception {
|
||||
//获取未处理的图片记录
|
||||
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttachPic>()
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.last("LIMIT 20")
|
||||
);
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
|
||||
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
|
||||
dealFileService.dealPicAnysStatus(picItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 第三步 定时 处理 附件上传后上传向量数据库
|
||||
*/
|
||||
//@Scheduled(fixedDelay = 30000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttachVector() throws Exception {
|
||||
//处理 需要上传向量数据库的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
dealFileService.dealVectorStatus(attachItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
/**
|
||||
* 第四步 定时 处理 失败数据
|
||||
*/
|
||||
//@Scheduled(fixedDelay = 30 * 60 * 1000)
|
||||
public void dealKnowledge40Status() throws Exception {
|
||||
//拆解PDF失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
|
||||
//将图片分析失败的数据 重新设置状态
|
||||
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
|
||||
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
|
||||
//上传向量库失败 重新设置状态
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
|
||||
}
|
||||
@Scheduled(fixedDelay = 180000) // 3分钟执行一次
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user