pdf文件解析成异步处理

This commit is contained in:
zhouweiyi
2025-05-14 15:41:57 +08:00
parent 52e0feda01
commit dc9bf3e25d
10 changed files with 866 additions and 660 deletions

View File

@@ -3,6 +3,7 @@ package org.ruoyi;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.metrics.buffering.BufferingApplicationStartup; import org.springframework.boot.context.metrics.buffering.BufferingApplicationStartup;
import org.springframework.scheduling.annotation.EnableScheduling;
/** /**
* 启动程序 * 启动程序
@@ -10,6 +11,7 @@ import org.springframework.boot.context.metrics.buffering.BufferingApplicationSt
* @author Lion Li * @author Lion Li
*/ */
@SpringBootApplication @SpringBootApplication
@EnableScheduling
public class RuoYiAIApplication { public class RuoYiAIApplication {
public static void main(String[] args) { public static void main(String[] args) {

View File

@@ -0,0 +1,16 @@
package org.ruoyi.constant;
/**
* @Description:
* @Date: 2025/5/14 下午2:04
*/
public class DealStatus {
//未开始
public static final Integer STATUS_10 = 10;
//进行中
public static final Integer STATUS_20 = 20;
//已结束
public static final Integer STATUS_30 = 30;
}

View File

@@ -58,4 +58,26 @@ public class KnowledgeAttach extends BaseEntity {
private String remark; private String remark;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
private Integer vectorStatus;
} }

View File

@@ -62,5 +62,30 @@ public class KnowledgeAttachBo extends BaseEntity {
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class}) @NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark; private String remark;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@NotNull(message = "拆解图片状态10未开始20进行中30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@NotNull(message = "写入向量数据库状态10未开始20进行中30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
private Integer vectorStatus;
} }

View File

@@ -10,8 +10,6 @@ import java.io.Serial;
import java.io.Serializable; import java.io.Serializable;
/** /**
* 知识库附件视图对象 knowledge_attach * 知识库附件视图对象 knowledge_attach
* *
@@ -68,5 +66,29 @@ public class KnowledgeAttachVo implements Serializable {
@ExcelProperty(value = "备注") @ExcelProperty(value = "备注")
private String remark; private String remark;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 拆解图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "拆解图片状态10未开始20进行中30已完成")
private Integer picStatus;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 写入向量数据库状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "写入向量数据库状态10未开始20进行中30已完成")
private Integer vectorStatus;
} }

View File

@@ -61,5 +61,5 @@ public interface IKnowledgeInfoService {
/** /**
* 上传附件 * 上传附件
*/ */
void upload(KnowledgeInfoUploadBo bo); void upload(KnowledgeInfoUploadBo bo) throws Exception;
} }

View File

@@ -118,7 +118,7 @@ public class KnowledgeController extends BaseController {
* 上传知识库附件 * 上传知识库附件
*/ */
@PostMapping(value = "/attach/upload") @PostMapping(value = "/attach/upload")
public R<String> upload(KnowledgeInfoUploadBo bo) { public R<String> upload(KnowledgeInfoUploadBo bo) throws Exception {
knowledgeInfoService.upload(bo); knowledgeInfoService.upload(bo);
return R.ok("上传知识库附件成功!"); return R.ok("上传知识库附件成功!");
} }

View File

@@ -1,11 +1,14 @@
package org.ruoyi.chat.service.knowledge; package org.ruoyi.chat.service.knowledge;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.ruoyi.chain.loader.ResourceLoader; import org.ruoyi.chain.loader.ResourceLoader;
import org.ruoyi.chain.loader.ResourceLoaderFactory; import org.ruoyi.chain.loader.ResourceLoaderFactory;
@@ -13,6 +16,8 @@ import org.ruoyi.common.core.domain.model.LoginUser;
import org.ruoyi.common.core.utils.MapstructUtils; import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils; import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.satoken.utils.LoginHelper; import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.constant.FileType;
import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo; import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.ChatModel; import org.ruoyi.domain.ChatModel;
@@ -30,11 +35,15 @@ import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService; import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.IKnowledgeInfoService; import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.system.service.ISysOssService;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import org.springframework.scheduling.annotation.Async;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
@@ -62,6 +71,8 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
private final IChatModelService chatModelService; private final IChatModelService chatModelService;
private final ISysOssService ossService;
/** /**
* 查询知识库 * 查询知识库
*/ */
@@ -96,14 +107,18 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid()); lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid());
lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname()); lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname());
lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare()); lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare());
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription, bo.getDescription()); lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription,
lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator, bo.getKnowledgeSeparator()); bo.getDescription());
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator, bo.getQuestionSeparator()); lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator,
bo.getKnowledgeSeparator());
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator,
bo.getQuestionSeparator());
lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar()); lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar());
lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit()); lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit());
lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize()); lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize());
lqw.eq(StringUtils.isNotBlank(bo.getVector()), KnowledgeInfo::getVector, bo.getVector()); lqw.eq(StringUtils.isNotBlank(bo.getVector()), KnowledgeInfo::getVector, bo.getVector());
lqw.eq(StringUtils.isNotBlank(bo.getVectorModel()), KnowledgeInfo::getVectorModel, bo.getVectorModel()); lqw.eq(StringUtils.isNotBlank(bo.getVectorModel()), KnowledgeInfo::getVectorModel,
bo.getVectorModel());
return lqw; return lqw;
} }
@@ -192,6 +207,12 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
public void storeContent(MultipartFile file, String kid) { public void storeContent(MultipartFile file, String kid) {
if (file == null || file.isEmpty()) {
throw new IllegalArgumentException("File cannot be null or empty");
}
SysOssVo uploadDto = null;
String fileName = file.getOriginalFilename(); String fileName = file.getOriginalFilename();
List<String> chunkList = new ArrayList<>(); List<String> chunkList = new ArrayList<>();
KnowledgeAttach knowledgeAttach = new KnowledgeAttach(); KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
@@ -201,13 +222,17 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
knowledgeAttach.setDocName(fileName); knowledgeAttach.setDocName(fileName);
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1)); knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1));
String content = ""; String content = "";
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType()); ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(
knowledgeAttach.getDocType());
List<String> fids = new ArrayList<>(); List<String> fids = new ArrayList<>();
try { try {
content = resourceLoader.getContent(file.getInputStream()); content = resourceLoader.getContent(file.getInputStream());
chunkList = resourceLoader.getChunkList(content, kid); chunkList = resourceLoader.getChunkList(content, kid);
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>(); List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
if (CollUtil.isNotEmpty(chunkList)) { if (CollUtil.isNotEmpty(chunkList)) {
// Upload file to OSS
uploadDto = ossService.upload(file);
for (int i = 0; i < chunkList.size(); i++) { for (int i = 0; i < chunkList.size(); i++) {
String fid = RandomUtil.randomString(10); String fid = RandomUtil.randomString(10);
fids.add(fid); fids.add(fid);
@@ -227,24 +252,22 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
knowledgeAttach.setContent(content); knowledgeAttach.setContent(content);
knowledgeAttach.setCreateTime(new Date()); knowledgeAttach.setCreateTime(new Date());
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
knowledgeAttach.setOssId(uploadDto.getOssId());
//只有pdf文件 才需要拆解图片和分析图片内容
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
} else {
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
}
//所有文件上传后,都需要同步到向量数据库
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
}
attachMapper.insert(knowledgeAttach); attachMapper.insert(knowledgeAttach);
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getKid, kid));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(kid);
storeEmbeddingBo.setDocId(docId);
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setModelName(knowledgeInfoVo.getVectorModel());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
} }
@@ -262,4 +285,94 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
} }
/**
* 定时 处理 附件上传后上传向量数据库和PDF文件图片拆解和分析内容
*/
@Scheduled(fixedDelay = 3000) // 每3秒执行一次
public void dealKnowledgeAttach() throws Exception {
//处理 需要上传向量数据库的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
this.dealVectorStatus(attachItem);
}
}
}
@Async
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
try {
//锁定数据 更改VectorStatus 到进行中
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId())
) == 0) {
return;
}
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getKid, attachItem.getKid()));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(
knowledgeInfoVo.getVectorModel());
List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList(
new LambdaQueryWrapper<KnowledgeFragment>()
.eq(KnowledgeFragment::getKid, attachItem.getKid())
.eq(KnowledgeFragment::getDocId, attachItem.getDocId())
);
if (ObjectUtil.isEmpty(knowledgeFragments)) {
throw new Exception("文件段落为空");
}
List<String> fids = knowledgeFragments.stream()
.map(KnowledgeFragment::getFid)
.collect(Collectors.toList());
if (ObjectUtil.isEmpty(fids)) {
throw new Exception("fids 为空");
}
List<String> chunkList = knowledgeFragments.stream()
.map(KnowledgeFragment::getContent)
.collect(Collectors.toList());
if (ObjectUtil.isEmpty(chunkList)) {
throw new Exception("chunkList 为空");
}
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(attachItem.getKid());
storeEmbeddingBo.setDocId(attachItem.getDocId());
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setModelName(knowledgeInfoVo.getVectorModel());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
//设置处理完成
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getId, attachItem.getId()));
} catch (Exception e) {
//设置处理失败
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getId, attachItem.getId()));
throw new RuntimeException(e);
}
}
} }

View File

@@ -0,0 +1,6 @@
ALTER TABLE `knowledge_attach`
ADD COLUMN `pic_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '拆解图片状态10未开始20进行中30已完成' AFTER `oss_id`,
ADD COLUMN `pic_anys_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '分析图片状态10未开始20进行中30已完成' AFTER `pic_status`,
ADD COLUMN `vector_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '写入向量数据库状态10未开始20进行中30已完成' AFTER `pic_anys_status`,
DROP PRIMARY KEY,
ADD PRIMARY KEY (`id`) USING BTREE;