mirror of
https://gitcode.com/ageerle/ruoyi-ai.git
synced 2026-04-17 05:43:39 +00:00
pdf文件解析成异步处理
This commit is contained in:
@@ -3,6 +3,7 @@ package org.ruoyi;
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.boot.context.metrics.buffering.BufferingApplicationStartup;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
|
||||
/**
|
||||
* 启动程序
|
||||
@@ -10,6 +11,7 @@ import org.springframework.boot.context.metrics.buffering.BufferingApplicationSt
|
||||
* @author Lion Li
|
||||
*/
|
||||
@SpringBootApplication
|
||||
@EnableScheduling
|
||||
public class RuoYiAIApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
package org.ruoyi.constant;
|
||||
|
||||
/**
|
||||
* @Description:
|
||||
* @Date: 2025/5/14 下午2:04
|
||||
*/
|
||||
public class DealStatus {
|
||||
//未开始
|
||||
public static final Integer STATUS_10 = 10;
|
||||
//进行中
|
||||
public static final Integer STATUS_20 = 20;
|
||||
//已结束
|
||||
public static final Integer STATUS_30 = 30;
|
||||
|
||||
|
||||
}
|
||||
@@ -58,4 +58,26 @@ public class KnowledgeAttach extends BaseEntity {
|
||||
private String remark;
|
||||
|
||||
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
private Long ossId;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer picStatus;
|
||||
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
private Integer vectorStatus;
|
||||
|
||||
}
|
||||
|
||||
@@ -62,5 +62,30 @@ public class KnowledgeAttachBo extends BaseEntity {
|
||||
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private String remark;
|
||||
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
|
||||
private Long ossId;
|
||||
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "拆解图片状态10未开始,20进行中,30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Integer picStatus;
|
||||
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "分析图片状态10未开始,20进行中,30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@NotNull(message = "写入向量数据库状态10未开始,20进行中,30已完成不能为空", groups = { AddGroup.class, EditGroup.class })
|
||||
private Integer vectorStatus;
|
||||
|
||||
|
||||
}
|
||||
|
||||
@@ -10,8 +10,6 @@ import java.io.Serial;
|
||||
import java.io.Serializable;
|
||||
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 知识库附件视图对象 knowledge_attach
|
||||
*
|
||||
@@ -68,5 +66,29 @@ public class KnowledgeAttachVo implements Serializable {
|
||||
@ExcelProperty(value = "备注")
|
||||
private String remark;
|
||||
|
||||
/**
|
||||
* 对象存储主键
|
||||
*/
|
||||
@ExcelProperty(value = "对象存储主键")
|
||||
private Long ossId;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* 拆解图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "拆解图片状态10未开始,20进行中,30已完成")
|
||||
private Integer picStatus;
|
||||
|
||||
/**
|
||||
* 分析图片状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "分析图片状态10未开始,20进行中,30已完成")
|
||||
private Integer picAnysStatus;
|
||||
|
||||
/**
|
||||
* 写入向量数据库状态10未开始,20进行中,30已完成
|
||||
*/
|
||||
@ExcelProperty(value = "写入向量数据库状态10未开始,20进行中,30已完成")
|
||||
private Integer vectorStatus;
|
||||
}
|
||||
|
||||
@@ -61,5 +61,5 @@ public interface IKnowledgeInfoService {
|
||||
/**
|
||||
* 上传附件
|
||||
*/
|
||||
void upload(KnowledgeInfoUploadBo bo);
|
||||
void upload(KnowledgeInfoUploadBo bo) throws Exception;
|
||||
}
|
||||
|
||||
@@ -118,7 +118,7 @@ public class KnowledgeController extends BaseController {
|
||||
* 上传知识库附件
|
||||
*/
|
||||
@PostMapping(value = "/attach/upload")
|
||||
public R<String> upload(KnowledgeInfoUploadBo bo) {
|
||||
public R<String> upload(KnowledgeInfoUploadBo bo) throws Exception {
|
||||
knowledgeInfoService.upload(bo);
|
||||
return R.ok("上传知识库附件成功!");
|
||||
}
|
||||
|
||||
@@ -1,11 +1,14 @@
|
||||
package org.ruoyi.chat.service.knowledge;
|
||||
|
||||
import cn.hutool.core.collection.CollUtil;
|
||||
import cn.hutool.core.util.ObjectUtil;
|
||||
import cn.hutool.core.util.RandomUtil;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
|
||||
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
|
||||
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
|
||||
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
|
||||
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.ruoyi.chain.loader.ResourceLoader;
|
||||
import org.ruoyi.chain.loader.ResourceLoaderFactory;
|
||||
@@ -13,6 +16,8 @@ import org.ruoyi.common.core.domain.model.LoginUser;
|
||||
import org.ruoyi.common.core.utils.MapstructUtils;
|
||||
import org.ruoyi.common.core.utils.StringUtils;
|
||||
import org.ruoyi.common.satoken.utils.LoginHelper;
|
||||
import org.ruoyi.constant.DealStatus;
|
||||
import org.ruoyi.constant.FileType;
|
||||
import org.ruoyi.core.page.PageQuery;
|
||||
import org.ruoyi.core.page.TableDataInfo;
|
||||
import org.ruoyi.domain.ChatModel;
|
||||
@@ -30,11 +35,15 @@ import org.ruoyi.mapper.KnowledgeInfoMapper;
|
||||
import org.ruoyi.service.IChatModelService;
|
||||
import org.ruoyi.service.VectorStoreService;
|
||||
import org.ruoyi.service.IKnowledgeInfoService;
|
||||
import org.ruoyi.system.domain.vo.SysOssVo;
|
||||
import org.ruoyi.system.service.ISysOssService;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import org.springframework.web.multipart.MultipartFile;
|
||||
import org.springframework.scheduling.annotation.Async;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
@@ -62,6 +71,8 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
|
||||
private final IChatModelService chatModelService;
|
||||
|
||||
private final ISysOssService ossService;
|
||||
|
||||
/**
|
||||
* 查询知识库
|
||||
*/
|
||||
@@ -96,14 +107,18 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid());
|
||||
lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname());
|
||||
lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription, bo.getDescription());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator, bo.getKnowledgeSeparator());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator, bo.getQuestionSeparator());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription,
|
||||
bo.getDescription());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator,
|
||||
bo.getKnowledgeSeparator());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator,
|
||||
bo.getQuestionSeparator());
|
||||
lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar());
|
||||
lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit());
|
||||
lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getVector()), KnowledgeInfo::getVector, bo.getVector());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getVectorModel()), KnowledgeInfo::getVectorModel, bo.getVectorModel());
|
||||
lqw.eq(StringUtils.isNotBlank(bo.getVectorModel()), KnowledgeInfo::getVectorModel,
|
||||
bo.getVectorModel());
|
||||
return lqw;
|
||||
}
|
||||
|
||||
@@ -192,6 +207,12 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
|
||||
public void storeContent(MultipartFile file, String kid) {
|
||||
if (file == null || file.isEmpty()) {
|
||||
throw new IllegalArgumentException("File cannot be null or empty");
|
||||
}
|
||||
|
||||
SysOssVo uploadDto = null;
|
||||
|
||||
String fileName = file.getOriginalFilename();
|
||||
List<String> chunkList = new ArrayList<>();
|
||||
KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
|
||||
@@ -201,13 +222,17 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
knowledgeAttach.setDocName(fileName);
|
||||
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1));
|
||||
String content = "";
|
||||
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType());
|
||||
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(
|
||||
knowledgeAttach.getDocType());
|
||||
List<String> fids = new ArrayList<>();
|
||||
try {
|
||||
content = resourceLoader.getContent(file.getInputStream());
|
||||
chunkList = resourceLoader.getChunkList(content, kid);
|
||||
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
|
||||
if (CollUtil.isNotEmpty(chunkList)) {
|
||||
// Upload file to OSS
|
||||
uploadDto = ossService.upload(file);
|
||||
|
||||
for (int i = 0; i < chunkList.size(); i++) {
|
||||
String fid = RandomUtil.randomString(10);
|
||||
fids.add(fid);
|
||||
@@ -227,24 +252,22 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
knowledgeAttach.setContent(content);
|
||||
knowledgeAttach.setCreateTime(new Date());
|
||||
|
||||
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
|
||||
knowledgeAttach.setOssId(uploadDto.getOssId());
|
||||
//只有pdf文件 才需要拆解图片和分析图片内容
|
||||
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
|
||||
} else {
|
||||
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
|
||||
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
|
||||
}
|
||||
//所有文件上传后,都需要同步到向量数据库
|
||||
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
|
||||
}
|
||||
|
||||
attachMapper.insert(knowledgeAttach);
|
||||
|
||||
// 通过kid查询知识库信息
|
||||
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
|
||||
.eq(KnowledgeInfo::getKid, kid));
|
||||
|
||||
// 通过向量模型查询模型信息
|
||||
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
|
||||
|
||||
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
|
||||
storeEmbeddingBo.setKid(kid);
|
||||
storeEmbeddingBo.setDocId(docId);
|
||||
storeEmbeddingBo.setFids(fids);
|
||||
storeEmbeddingBo.setChunkList(chunkList);
|
||||
storeEmbeddingBo.setModelName(knowledgeInfoVo.getVectorModel());
|
||||
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
|
||||
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
|
||||
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
|
||||
}
|
||||
|
||||
|
||||
@@ -262,4 +285,94 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* 定时 处理 附件上传后上传向量数据库和PDF文件图片拆解和分析内容
|
||||
*/
|
||||
@Scheduled(fixedDelay = 3000) // 每3秒执行一次
|
||||
public void dealKnowledgeAttach() throws Exception {
|
||||
//处理 需要上传向量数据库的记录
|
||||
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeAttach>()
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
);
|
||||
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
|
||||
for (KnowledgeAttach attachItem : knowledgeAttaches) {
|
||||
this.dealVectorStatus(attachItem);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Async
|
||||
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
|
||||
try {
|
||||
//锁定数据 更改VectorStatus 到进行中
|
||||
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId())
|
||||
) == 0) {
|
||||
return;
|
||||
}
|
||||
// 通过kid查询知识库信息
|
||||
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
|
||||
.eq(KnowledgeInfo::getKid, attachItem.getKid()));
|
||||
|
||||
// 通过向量模型查询模型信息
|
||||
ChatModelVo chatModelVo = chatModelService.selectModelByName(
|
||||
knowledgeInfoVo.getVectorModel());
|
||||
|
||||
List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList(
|
||||
new LambdaQueryWrapper<KnowledgeFragment>()
|
||||
.eq(KnowledgeFragment::getKid, attachItem.getKid())
|
||||
.eq(KnowledgeFragment::getDocId, attachItem.getDocId())
|
||||
);
|
||||
if (ObjectUtil.isEmpty(knowledgeFragments)) {
|
||||
throw new Exception("文件段落为空");
|
||||
}
|
||||
List<String> fids = knowledgeFragments.stream()
|
||||
.map(KnowledgeFragment::getFid)
|
||||
.collect(Collectors.toList());
|
||||
if (ObjectUtil.isEmpty(fids)) {
|
||||
throw new Exception("fids 为空");
|
||||
}
|
||||
List<String> chunkList = knowledgeFragments.stream()
|
||||
.map(KnowledgeFragment::getContent)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (ObjectUtil.isEmpty(chunkList)) {
|
||||
throw new Exception("chunkList 为空");
|
||||
}
|
||||
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
|
||||
storeEmbeddingBo.setKid(attachItem.getKid());
|
||||
storeEmbeddingBo.setDocId(attachItem.getDocId());
|
||||
storeEmbeddingBo.setFids(fids);
|
||||
storeEmbeddingBo.setChunkList(chunkList);
|
||||
storeEmbeddingBo.setModelName(knowledgeInfoVo.getVectorModel());
|
||||
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
|
||||
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
|
||||
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
|
||||
|
||||
//设置处理完成
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId()));
|
||||
} catch (Exception e) {
|
||||
//设置处理失败
|
||||
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
|
||||
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
|
||||
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
|
||||
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
|
||||
.eq(KnowledgeAttach::getId, attachItem.getId()));
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
6
script/sql/update/202505141010.sql
Normal file
6
script/sql/update/202505141010.sql
Normal file
@@ -0,0 +1,6 @@
|
||||
ALTER TABLE `knowledge_attach`
|
||||
ADD COLUMN `pic_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '拆解图片状态10未开始,20进行中,30已完成' AFTER `oss_id`,
|
||||
ADD COLUMN `pic_anys_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '分析图片状态10未开始,20进行中,30已完成' AFTER `pic_status`,
|
||||
ADD COLUMN `vector_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '写入向量数据库状态10未开始,20进行中,30已完成' AFTER `pic_anys_status`,
|
||||
DROP PRIMARY KEY,
|
||||
ADD PRIMARY KEY (`id`) USING BTREE;
|
||||
Reference in New Issue
Block a user