diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java index 917652c4..4b6c01dc 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java @@ -1,13 +1,11 @@ package org.ruoyi.service.impl; -import cn.hutool.core.util.RandomUtil; import com.google.protobuf.ServiceException; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.ollama.OllamaEmbeddingModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel; -import dev.langchain4j.model.output.Response; import dev.langchain4j.store.embedding.EmbeddingMatch; import dev.langchain4j.store.embedding.EmbeddingSearchRequest; import dev.langchain4j.store.embedding.EmbeddingStore; @@ -31,6 +29,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * 向量库管理 @@ -131,31 +130,7 @@ public class VectorStoreServiceImpl implements VectorStoreService { createSchema(kid,modelName); // 根据条件删除向量数据 Filter simpleFilter = new IsEqualTo("kid", kid); - removeByFilter(simpleFilter); - } - - public void removeByFilter(Filter filter) { - List dummyVector = new ArrayList<>(); - // TODO 模型维度 - int dimension = 1024; - for (int i = 0; i < dimension; i++) { - dummyVector.add(0.0f); - } - Embedding dummyEmbedding = Embedding.from(dummyVector); - EmbeddingSearchRequest request = EmbeddingSearchRequest.builder() - .queryEmbedding(dummyEmbedding) - .filter(filter) - .maxResults(10000) - .build(); - // 搜索 - List idsToDelete = embeddingStore.search(request) - .matches().stream() - .map(EmbeddingMatch::embeddingId) - .collect(Collectors.toList()); - // 删除 - if (!idsToDelete.isEmpty()) { - embeddingStore.removeAll(idsToDelete); - } + embeddingStore.removeAll(simpleFilter); } @Override diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java deleted file mode 100644 index 2951c20f..00000000 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java +++ /dev/null @@ -1,385 +0,0 @@ -package org.ruoyi.chat.service.knowledge; - -import cn.hutool.core.util.ObjectUtil; -import cn.hutool.core.util.RandomUtil; -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; -import com.baomidou.mybatisplus.core.toolkit.Wrappers; -import java.util.Collection; -import java.util.Date; -import java.util.List; -import java.util.stream.Collectors; -import lombok.RequiredArgsConstructor; -import org.ruoyi.chain.loader.ResourceLoaderFactory; -import org.ruoyi.constant.DealStatus; -import org.ruoyi.domain.KnowledgeAttach; -import org.ruoyi.domain.KnowledgeAttachPic; -import org.ruoyi.domain.KnowledgeFragment; -import org.ruoyi.domain.KnowledgeInfo; -import org.ruoyi.domain.PdfFileContentResult; -import org.ruoyi.domain.bo.StoreEmbeddingBo; -import org.ruoyi.domain.vo.ChatModelVo; -import org.ruoyi.domain.vo.KnowledgeAttachVo; -import org.ruoyi.domain.vo.KnowledgeInfoVo; -import org.ruoyi.mapper.KnowledgeAttachMapper; -import org.ruoyi.mapper.KnowledgeAttachPicMapper; -import org.ruoyi.mapper.KnowledgeFragmentMapper; -import org.ruoyi.mapper.KnowledgeInfoMapper; -import org.ruoyi.service.IChatModelService; -import org.ruoyi.service.VectorStoreService; -import org.ruoyi.service.impl.PdfImageExtractServiceImpl; -import org.ruoyi.system.domain.vo.SysOssVo; -import org.ruoyi.system.service.ISysOssService; -import org.ruoyi.utils.ZipUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.scheduling.annotation.Async; -import org.springframework.stereotype.Service; -import org.springframework.web.multipart.MultipartFile; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -/** - * @Description: - * @Date: 2025/5/15 下午4:29 - */ -@Service -@RequiredArgsConstructor -public class DealFileService { - private static final Logger log = LoggerFactory.getLogger(DealFileService.class); - - private final KnowledgeInfoMapper baseMapper; - - private final VectorStoreService vectorStoreService; - - private final ResourceLoaderFactory resourceLoaderFactory; - - private final KnowledgeFragmentMapper fragmentMapper; - - private final KnowledgeAttachMapper attachMapper; - - private final IChatModelService chatModelService; - - private final ISysOssService ossService; - -// private final PdfImageExtractService pdfImageExtractService; - - private final KnowledgeAttachPicMapper picMapper; - - @Value("${pdf.extract.service.url}") - private String serviceUrl; - @Value("${pdf.extract.ai-api.url}") - private String aiApiUrl; - @Value("${pdf.extract.ai-api.key}") - private String aiApiKey; - - - @Async - public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception { - try { - //锁定数据 更改VectorStatus 到进行中 - if (attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId()) - ) == 0) { - return; - } - List knowledgeFragments = fragmentMapper.selectList( - new LambdaQueryWrapper() - .eq(KnowledgeFragment::getKid, attachItem.getKid()) - .eq(KnowledgeFragment::getDocId, attachItem.getDocId()) - ); - if (ObjectUtil.isEmpty(knowledgeFragments)) { - throw new Exception("文件段落为空"); - } - List fids = knowledgeFragments.stream() - .map(KnowledgeFragment::getFid) - .collect(Collectors.toList()); - if (ObjectUtil.isEmpty(fids)) { - throw new Exception("fids 为空"); - } - List chunkList = knowledgeFragments.stream() - .map(KnowledgeFragment::getContent) - .collect(Collectors.toList()); - - if (ObjectUtil.isEmpty(chunkList)) { - throw new Exception("chunkList 为空"); - } - // 通过kid查询知识库信息 - KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.lambdaQuery() - .eq(KnowledgeInfo::getId, attachItem.getKid())); - // 通过向量模型查询模型信息 - ChatModelVo chatModelVo = chatModelService.selectModelByName( - knowledgeInfoVo.getEmbeddingModelName()); - - StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo(); - storeEmbeddingBo.setKid(attachItem.getKid()); - storeEmbeddingBo.setDocId(attachItem.getDocId()); - storeEmbeddingBo.setFids(fids); - storeEmbeddingBo.setChunkList(chunkList); - storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName()); - storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName()); - storeEmbeddingBo.setApiKey(chatModelVo.getApiKey()); - storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost()); - vectorStoreService.storeEmbeddings(storeEmbeddingBo); - - //设置处理完成 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getId, attachItem.getId())); - } catch (Exception e) { - //设置处理失败 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40) - .set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage()) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getId, attachItem.getId())); - throw new RuntimeException(e); - } - } - - @Async - public void dealPicStatus(KnowledgeAttach attachItem) throws Exception { - try { - //锁定数据 更改picStatus 到进行中 - if (attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId()) - ) == 0) { - return; - } - //获取附件 - if (ObjectUtil.isEmpty(attachItem.getOssId())) { - log.error("==========OssId 为空,attachItem={}", attachItem); - throw new Exception("OssId 为空"); - } - //获取oss文件 - MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId()); - //拆解出图片ZIP - PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl, - aiApiUrl, aiApiKey); - byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true); - //解压zip,得到图片文件 - MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs); - //上传文件到OSS,写入表 - for (MultipartFile file : multipartFiles) { - //先查找是否有相同图片名称,先做删除 - List knowledgeAttachPics = picMapper.selectList( - new LambdaQueryWrapper() - .eq(KnowledgeAttachPic::getKid, attachItem.getKid()) - .eq(KnowledgeAttachPic::getAid, attachItem.getId()) - .eq(KnowledgeAttachPic::getDocName, file.getOriginalFilename()) - ); - if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { - Collection ossIds = knowledgeAttachPics.stream() - .map(KnowledgeAttachPic::getOssId) - .collect(Collectors.toList()); - ossService.deleteWithValidByIds(ossIds, false); - List collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId) - .collect(Collectors.toList()); - picMapper.deleteByIds(collect); - } - - SysOssVo upload = ossService.upload(file); - KnowledgeAttachPic entity = new KnowledgeAttachPic(); - entity.setKid(attachItem.getKid()); - entity.setAid(String.valueOf(attachItem.getId())); - entity.setDocName(file.getOriginalFilename()); - entity.setDocType( - file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1)); - entity.setOssId(upload.getOssId()); - int[] ints = extractPageNumbers(file.getOriginalFilename()); - if (ObjectUtil.isNotEmpty(ints)) { - assert ints != null; - if (ints.length == 2) { - entity.setPageNum(ints[0]); - entity.setIndexNum(ints[1]); - } - } - picMapper.insert(entity); - } - - //设置处理完成 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId())); - } catch (Exception e) { - //设置处理失败 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40) - .set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage()) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId())); - throw new RuntimeException(e); - } - - } - - - @Async - public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception { - try { - //锁定数据 更改 getPicAnysStatus 到进行中 - if (picMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttachPic::getId, picItem.getId()) - ) == 0) { - return; - } - SysOssVo ossVo = ossService.getById(picItem.getOssId()); - if (ObjectUtil.isNotEmpty(ossVo)) { - String fileStr = ossService.downloadByByte(picItem.getOssId()); - //调用第三方 分析图片内容 - PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl( - serviceUrl, - aiApiUrl, aiApiKey); - List pdfFileContentResults = pdfImageExtractService.dealFileContent( - new String[]{fileStr}); - if (ObjectUtil.isNotEmpty(pdfFileContentResults)) { - for (PdfFileContentResult resultItem : pdfFileContentResults) { - //图片解析内容回写到pic表 - picMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttachPic::getContent, parseContent(resultItem.getContent())) - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttachPic::getId, picItem.getId())); - //将图片解析内容 写入段落表 fragment - KnowledgeAttachVo knowledgeAttachVo = attachMapper.selectVoById(picItem.getAid()); - if (ObjectUtil.isNotEmpty(knowledgeAttachVo)) { - String fid = RandomUtil.randomString(10); - KnowledgeFragment knowledgeFragment = new KnowledgeFragment(); - knowledgeFragment.setKid(knowledgeAttachVo.getKid()); - knowledgeFragment.setDocId(knowledgeAttachVo.getDocId()); - knowledgeFragment.setFid(fid); - knowledgeFragment.setIdx(0); - knowledgeFragment.setContent(parseContent(resultItem.getContent())); - knowledgeFragment.setCreateTime(new Date()); - fragmentMapper.insert(knowledgeFragment); - - //更新attach表,需要所有图片都处理完毕 - // 查询非30状态(完成状态)的记录数量 - long nonStatus30Count = picMapper.selectCount( - new LambdaQueryWrapper() - .ne(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttachPic::getAid, picItem.getAid()) - ); - if (nonStatus30Count == 0) { - // 执行表更新操作 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, picItem.getAid())); - } - } - } - } - } - } catch (Exception e) { - //失败 - picMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40) - .set(KnowledgeAttachPic::getRemark, picItem.getRemark() + e.getMessage()) - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttachPic::getId, picItem.getId())); - throw new RuntimeException(e); - } - } - - - /** - * 从文件名中提取page后面的两个数字 - * - * @param fileName 文件名 - * @return 包含两个数字的数组,如果未找到则返回null - */ - public static int[] extractPageNumbers(String fileName) { - // 查找"page_"的位置 - int pageIndex = fileName.indexOf("page_"); - - if (pageIndex == -1) { - return null; - } - - // 从"page_"后开始截取 - String afterPage = fileName.substring(pageIndex + 5); - - // 按下划线分割 - String[] parts = afterPage.split("_"); - - if (parts.length >= 2) { - try { - // 提取两个数字 - int firstNumber = Integer.parseInt(parts[0]); - - // 对于第二个数字,需要去掉可能的文件扩展名 - String secondPart = parts[1]; - int dotIndex = secondPart.indexOf("."); - if (dotIndex != -1) { - secondPart = secondPart.substring(0, dotIndex); - } - - int secondNumber = Integer.parseInt(secondPart); - - return new int[]{firstNumber, secondNumber}; - } catch (NumberFormatException e) { - return null; - } - } - - return null; - } - - public static String parseContent(String jsonString) { - try { - // 创建ObjectMapper实例 - ObjectMapper objectMapper = new ObjectMapper(); - - // 解析JSON字符串 - JsonNode rootNode = objectMapper.readTree(jsonString); - - // 获取choices数组的第一个元素 - JsonNode choicesNode = rootNode.get("choices"); - if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) { - // 获取第一个choice - JsonNode firstChoice = choicesNode.get(0); - - // 获取message节点 - JsonNode messageNode = firstChoice.get("message"); - if (messageNode != null) { - // 获取content字段的值 - JsonNode contentNode = messageNode.get("content"); - if (contentNode != null) { - return contentNode.asText(); - } - } - } - - return "无法找到content内容"; - } catch (Exception e) { - e.printStackTrace(); - return "解析JSON时发生错误: " + e.getMessage(); - } - } - - -} diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java index 06ad06b6..914256e3 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java @@ -4,29 +4,21 @@ import cn.hutool.core.collection.CollUtil; import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.RandomUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; -import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; import org.ruoyi.chain.loader.ResourceLoader; import org.ruoyi.chain.loader.ResourceLoaderFactory; import org.ruoyi.common.core.domain.model.LoginUser; import org.ruoyi.common.core.utils.MapstructUtils; import org.ruoyi.common.core.utils.StringUtils; import org.ruoyi.common.satoken.utils.LoginHelper; -import org.ruoyi.constant.DealStatus; -import org.ruoyi.constant.FileType; import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.TableDataInfo; -import org.ruoyi.domain.ChatModel; import org.ruoyi.domain.KnowledgeAttach; import org.ruoyi.domain.KnowledgeAttachPic; import org.ruoyi.domain.KnowledgeFragment; import org.ruoyi.domain.KnowledgeInfo; -import org.ruoyi.domain.PdfFileContentResult; import org.ruoyi.domain.bo.KnowledgeInfoBo; import org.ruoyi.domain.bo.KnowledgeInfoUploadBo; import org.ruoyi.domain.bo.StoreEmbeddingBo; @@ -34,28 +26,21 @@ import org.ruoyi.domain.vo.ChatModelVo; import org.ruoyi.domain.vo.KnowledgeAttachVo; import org.ruoyi.domain.vo.KnowledgeInfoVo; import org.ruoyi.mapper.KnowledgeAttachMapper; -import org.ruoyi.mapper.KnowledgeAttachPicMapper; import org.ruoyi.mapper.KnowledgeFragmentMapper; import org.ruoyi.mapper.KnowledgeInfoMapper; import org.ruoyi.service.IChatModelService; -import org.ruoyi.service.PdfImageExtractService; -import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.IKnowledgeInfoService; -import org.ruoyi.service.impl.PdfImageExtractServiceImpl; -import org.ruoyi.system.domain.vo.SysOssVo; -import org.ruoyi.utils.ZipUtils; +import org.ruoyi.service.VectorStoreService; +import org.ruoyi.system.service.ISysOssService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.scheduling.annotation.Async; -import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; -import org.ruoyi.system.service.ISysOssService; import java.io.IOException; import java.util.*; +import java.util.stream.Collectors; /** @@ -69,6 +54,7 @@ import java.util.*; public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class); + private final KnowledgeInfoMapper baseMapper; private final VectorStoreService vectorStoreService; @@ -83,19 +69,6 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { private final ISysOssService ossService; -// private final PdfImageExtractService pdfImageExtractService; - - private final KnowledgeAttachPicMapper picMapper; - - private final DealFileService dealFileService; - - @Value("${pdf.extract.service.url}") - private String serviceUrl; - @Value("${pdf.extract.ai-api.url}") - private String aiApiUrl; - @Value("${pdf.extract.ai-api.key}") - private String aiApiKey; - /** * 查询知识库 */ @@ -207,45 +180,16 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { @Override @Transactional(rollbackFor = Exception.class) public void removeKnowledge(String id) { - Map map = new HashMap<>(); - map.put("kid", id); + Map map = new HashMap<>(); + map.put("kid",id); List knowledgeInfoList = baseMapper.selectVoByMap(map); check(knowledgeInfoList); // 删除向量库信息 - knowledgeInfoList.forEach(knowledgeInfoVo -> { - vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()), - knowledgeInfoVo.getVectorModelName()); - }); +// knowledgeInfoList.forEach(knowledgeInfoVo -> { +// vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName()); +// }); // 删除附件和知识片段 fragmentMapper.deleteByMap(map); - List knowledgeAttachVos = attachMapper.selectVoByMap(map); - if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) { - Collection ossIds = knowledgeAttachVos.stream() - .map(KnowledgeAttachVo::getOssId) - .collect(Collectors.toList()); - //删除oss - ossService.deleteWithValidByIds(ossIds, false); - - //删除图片oss - List knowledgeAttachPics = picMapper.selectList( - new LambdaQueryWrapper() - .in(KnowledgeAttachPic::getKid, - knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid) - .collect(Collectors.toList())) - .in(KnowledgeAttachPic::getAid, - knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId) - .collect(Collectors.toList())) - ); - if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { - Collection tossIds = knowledgeAttachPics.stream() - .map(KnowledgeAttachPic::getOssId) - .collect(Collectors.toList()); - ossService.deleteWithValidByIds(tossIds, false); - List collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId) - .collect(Collectors.toList()); - picMapper.deleteByIds(collect); - } - } attachMapper.deleteByMap(map); // 删除知识库 baseMapper.deleteByMap(map); @@ -257,11 +201,6 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { } public void storeContent(MultipartFile file, String kid) { - if (file == null || file.isEmpty()) { - throw new IllegalArgumentException("File cannot be null or empty"); - } - - SysOssVo uploadDto = null; String fileName = file.getOriginalFilename(); List chunkList = new ArrayList<>(); KnowledgeAttach knowledgeAttach = new KnowledgeAttach(); @@ -269,18 +208,15 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { String docId = RandomUtil.randomString(10); knowledgeAttach.setDocId(docId); knowledgeAttach.setDocName(fileName); - knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1)); + knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1)); String content = ""; - ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType( - knowledgeAttach.getDocType()); + ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType()); List fids = new ArrayList<>(); try { content = resourceLoader.getContent(file.getInputStream()); chunkList = resourceLoader.getChunkList(content, kid); List knowledgeFragmentList = new ArrayList<>(); if (CollUtil.isNotEmpty(chunkList)) { - // Upload file to OSS - uploadDto = ossService.upload(file); for (int i = 0; i < chunkList.size(); i++) { String fid = RandomUtil.randomString(10); fids.add(fid); @@ -300,21 +236,25 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { } knowledgeAttach.setContent(content); knowledgeAttach.setCreateTime(new Date()); - if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) { - knowledgeAttach.setOssId(uploadDto.getOssId()); - //只有pdf文件 才需要拆解图片和分析图片内容 - if (FileType.PDF.equals(knowledgeAttach.getDocType())) { - knowledgeAttach.setPicStatus(DealStatus.STATUS_10); - knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10); - } else { - knowledgeAttach.setPicStatus(DealStatus.STATUS_30); - knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30); - } - //所有文件上传后,都需要同步到向量数据库 - knowledgeAttach.setVectorStatus(DealStatus.STATUS_10); - } attachMapper.insert(knowledgeAttach); + // 通过kid查询知识库信息 + KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.lambdaQuery() + .eq(KnowledgeInfo::getId, kid)); + + // 通过向量模型查询模型信息 + ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName()); + + StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo(); + storeEmbeddingBo.setKid(kid); + storeEmbeddingBo.setDocId(docId); + storeEmbeddingBo.setFids(fids); + storeEmbeddingBo.setChunkList(chunkList); + storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName()); + storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName()); + storeEmbeddingBo.setApiKey(chatModelVo.getApiKey()); + storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost()); + vectorStoreService.storeEmbeddings(storeEmbeddingBo); } /** @@ -331,79 +271,4 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { } } - /** - * 第一步 定时 拆解PDF文件中的图片 - */ - //@Scheduled(fixedDelay = 15000) // 每3秒执行一次 - public void dealKnowledgeAttachPic() throws Exception { - //处理 拆解PDF文件中的图片的记录 - List knowledgeAttaches = attachMapper.selectList( - new LambdaQueryWrapper() - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - ); - log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size()); - if (ObjectUtil.isNotEmpty(knowledgeAttaches)) { - for (KnowledgeAttach attachItem : knowledgeAttaches) { - dealFileService.dealPicStatus(attachItem); - } - } - } - - /** - * 第二步 定时 解析图片内容 - */ - //@Scheduled(fixedDelay = 15000) - public void dealKnowledgeAttachPicAnys() throws Exception { - //获取未处理的图片记录 - List knowledgeAttachPics = picMapper.selectList( - new LambdaQueryWrapper() - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) - .last("LIMIT 20") - ); - if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { - for (KnowledgeAttachPic picItem : knowledgeAttachPics) { - dealFileService.dealPicAnysStatus(picItem); - } - } - } - /** - * 第三步 定时 处理 附件上传后上传向量数据库 - */ - //@Scheduled(fixedDelay = 30000) // 每3秒执行一次 - public void dealKnowledgeAttachVector() throws Exception { - //处理 需要上传向量数据库的记录 - List knowledgeAttaches = attachMapper.selectList( - new LambdaQueryWrapper() - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - ); - log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size()); - if (ObjectUtil.isNotEmpty(knowledgeAttaches)) { - for (KnowledgeAttach attachItem : knowledgeAttaches) { - dealFileService.dealVectorStatus(attachItem); - } - } - } - /** - * 第四步 定时 处理 失败数据 - */ - //@Scheduled(fixedDelay = 30 * 60 * 1000) - public void dealKnowledge40Status() throws Exception { - //拆解PDF失败 重新设置状态 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40)); - //将图片分析失败的数据 重新设置状态 - picMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40)); - //上传向量库失败 重新设置状态 - attachMapper.update(new LambdaUpdateWrapper() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)); - } - @Scheduled(fixedDelay = 180000) // 3分钟执行一次 }