feat:pdf文件解析图片和分析图片,上传向量数据库都修改成 成异步处理

This commit is contained in:
zhouweiyi
2025-05-15 17:46:38 +08:00
parent 584212c569
commit c6ffbcb3cf
24 changed files with 1585 additions and 414 deletions

View File

@@ -3,6 +3,7 @@ package org.ruoyi;
import org.springframework.boot.SpringApplication; import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication; import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.metrics.buffering.BufferingApplicationStartup; import org.springframework.boot.context.metrics.buffering.BufferingApplicationStartup;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling; import org.springframework.scheduling.annotation.EnableScheduling;
/** /**
@@ -12,6 +13,7 @@ import org.springframework.scheduling.annotation.EnableScheduling;
*/ */
@SpringBootApplication @SpringBootApplication
@EnableScheduling @EnableScheduling
@EnableAsync
public class RuoYiAIApplication { public class RuoYiAIApplication {
public static void main(String[] args) { public static void main(String[] args) {

View File

@@ -114,6 +114,10 @@
<artifactId>commons-io</artifactId> <artifactId>commons-io</artifactId>
<version>2.17.0</version> <version>2.17.0</version>
</dependency> </dependency>
<dependency>
<groupId>org.ruoyi</groupId>
<artifactId>ruoyi-system-api</artifactId>
</dependency>
</dependencies> </dependencies>

View File

@@ -11,6 +11,8 @@ public class DealStatus {
public static final Integer STATUS_20 = 20; public static final Integer STATUS_20 = 20;
//已结束 //已结束
public static final Integer STATUS_30 = 30; public static final Integer STATUS_30 = 30;
//处理失败
public static final Integer STATUS_40 = 40;
} }

View File

@@ -0,0 +1,81 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.*;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.io.Serial;
import org.ruoyi.core.domain.BaseEntity;
/**
* 知识库附件图片列对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_attach_pic")
public class KnowledgeAttachPic extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@TableId(value = "id")
private Long id;
/**
* 知识库id
*/
private String kid;
/**
* 附件id
*/
private String aid;
/**
* 文档名称
*/
private String docName;
/**
* 文档类型
*/
private String docType;
/**
* 文档内容
*/
private String content;
/**
* 所在页数
*/
private Integer pageNum;
/**
* 所在页index
*/
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 备注
*/
private String remark;
}

View File

@@ -0,0 +1,90 @@
package org.ruoyi.domain.bo;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import lombok.EqualsAndHashCode;
import jakarta.validation.constraints.*;
import org.ruoyi.domain.KnowledgeAttachPic;
/**
* 知识库附件图片列业务对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeAttachPic.class, reverseConvertGenerate = false)
public class KnowledgeAttachPicBo extends BaseEntity {
/**
* 主键
*/
@NotNull(message = "主键不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库id
*/
@NotBlank(message = "知识库id不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 附件id
*/
@NotBlank(message = "附件id不能为空", groups = {AddGroup.class, EditGroup.class})
private String aid;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String docName;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
private String docType;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 所在页数
*/
@NotNull(message = "所在页数不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer pageNum;
/**
* 所在页index
*/
@NotNull(message = "所在页index不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class,
EditGroup.class})
private Integer picAnysStatus;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
}

View File

@@ -0,0 +1,92 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import java.io.Serial;
import java.io.Serializable;
import org.ruoyi.domain.KnowledgeAttachPic;
/**
* 知识库附件图片列视图对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeAttachPic.class)
public class KnowledgeAttachPicVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@ExcelProperty(value = "主键")
private Long id;
/**
* 知识库id
*/
@ExcelProperty(value = "知识库id")
private String kid;
/**
* 附件id
*/
@ExcelProperty(value = "附件id")
private String aid;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 所在页数
*/
@ExcelProperty(value = "所在页数")
private Integer pageNum;
/**
* 所在页index
*/
@ExcelProperty(value = "所在页index")
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
}

View File

@@ -0,0 +1,15 @@
package org.ruoyi.mapper;
import org.ruoyi.core.mapper.BaseMapperPlus;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
/**
* 知识库附件图片列Mapper接口
*
* @author Albert
* @date 2025-05-15
*/
public interface KnowledgeAttachPicMapper extends BaseMapperPlus<KnowledgeAttachPic, KnowledgeAttachPicVo> {
}

View File

@@ -0,0 +1,47 @@
package org.ruoyi.service;
import java.util.Collection;
import java.util.List;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.bo.KnowledgeAttachPicBo;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
/**
* 知识库附件图片列Service接口
*
* @author Albert
* @date 2025-05-15
*/
public interface IKnowledgeAttachPicService {
/**
* 查询知识库附件图片列
*/
KnowledgeAttachPicVo queryById(Long id);
/**
* 查询知识库附件图片列列表
*/
TableDataInfo<KnowledgeAttachPicVo> queryPageList(KnowledgeAttachPicBo bo, PageQuery pageQuery);
/**
* 查询知识库附件图片列列表
*/
List<KnowledgeAttachPicVo> queryList(KnowledgeAttachPicBo bo);
/**
* 新增知识库附件图片列
*/
Boolean insertByBo(KnowledgeAttachPicBo bo);
/**
* 修改知识库附件图片列
*/
Boolean updateByBo(KnowledgeAttachPicBo bo);
/**
* 校验并批量删除知识库附件图片列信息
*/
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
}

View File

@@ -0,0 +1,123 @@
package org.ruoyi.service.impl;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.core.page.PageQuery;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.bo.KnowledgeAttachPicBo;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.service.IKnowledgeAttachPicService;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Map;
import java.util.Collection;
/**
* 知识库附件图片列Service业务层处理
*
* @author ageerle
* @date 2025-05-15
*/
@RequiredArgsConstructor
@Service
public class KnowledgeAttachPicServiceImpl implements IKnowledgeAttachPicService {
private final KnowledgeAttachPicMapper baseMapper;
/**
* 查询知识库附件图片列
*/
@Override
public KnowledgeAttachPicVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件图片列列表
*/
@Override
public TableDataInfo<KnowledgeAttachPicVo> queryPageList(KnowledgeAttachPicBo bo,
PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttachPic> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachPicVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件图片列列表
*/
@Override
public List<KnowledgeAttachPicVo> queryList(KnowledgeAttachPicBo bo) {
LambdaQueryWrapper<KnowledgeAttachPic> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttachPic> buildQueryWrapper(KnowledgeAttachPicBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttachPic> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttachPic::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getAid()), KnowledgeAttachPic::getAid, bo.getAid());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttachPic::getDocName,
bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttachPic::getDocType,
bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttachPic::getContent,
bo.getContent());
lqw.eq(bo.getPageNum() != null, KnowledgeAttachPic::getPageNum, bo.getPageNum());
lqw.eq(bo.getIndexNum() != null, KnowledgeAttachPic::getIndexNum, bo.getIndexNum());
lqw.eq(bo.getPicAnysStatus() != null, KnowledgeAttachPic::getPicAnysStatus,
bo.getPicAnysStatus());
lqw.eq(bo.getOssId() != null, KnowledgeAttachPic::getOssId, bo.getOssId());
return lqw;
}
/**
* 新增知识库附件图片列
*/
@Override
public Boolean insertByBo(KnowledgeAttachPicBo bo) {
KnowledgeAttachPic add = MapstructUtils.convert(bo, KnowledgeAttachPic.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件图片列
*/
@Override
public Boolean updateByBo(KnowledgeAttachPicBo bo) {
KnowledgeAttachPic update = MapstructUtils.convert(bo, KnowledgeAttachPic.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttachPic entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件图片列
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
}

View File

@@ -1,5 +1,7 @@
package org.ruoyi.service.impl; package org.ruoyi.service.impl;
import cn.hutool.core.util.ObjectUtil;
import java.util.stream.Collectors;
import org.ruoyi.common.core.utils.MapstructUtils; import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils; import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.core.page.TableDataInfo; import org.ruoyi.core.page.TableDataInfo;
@@ -8,8 +10,11 @@ import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.vo.KnowledgeAttachVo; import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper; import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.system.service.ISysOssService;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.ruoyi.domain.bo.KnowledgeAttachBo; import org.ruoyi.domain.bo.KnowledgeAttachBo;
@@ -36,11 +41,15 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
private final KnowledgeAttachMapper baseMapper; private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper; private final KnowledgeFragmentMapper fragmentMapper;
private final ISysOssService ossService;
private final KnowledgeAttachPicMapper picMapper;
/** /**
* 查询知识库附件 * 查询知识库附件
*/ */
@Override @Override
public KnowledgeAttachVo queryById(Long id){ public KnowledgeAttachVo queryById(Long id) {
return baseMapper.selectVoById(id); return baseMapper.selectVoById(id);
} }
@@ -101,7 +110,7 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
/** /**
* 保存前的数据校验 * 保存前的数据校验
*/ */
private void validEntityBeforeSave(KnowledgeAttach entity){ private void validEntityBeforeSave(KnowledgeAttach entity) {
//TODO 做一些数据校验,如唯一约束 //TODO 做一些数据校验,如唯一约束
} }
@@ -110,7 +119,7 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
*/ */
@Override @Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) { public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){ if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验 //TODO 做一些业务上的校验,判断是否需要校验
} }
return baseMapper.deleteBatchIds(ids) > 0; return baseMapper.deleteBatchIds(ids) > 0;
@@ -118,8 +127,35 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
@Override @Override
public void removeKnowledgeAttach(String docId) { public void removeKnowledgeAttach(String docId) {
Map<String,Object> map = new HashMap<>(); Map<String, Object> map = new HashMap<>();
map.put("doc_id",docId); map.put("doc_id", docId);
List<KnowledgeAttachVo> knowledgeAttachVos = baseMapper.selectVoByMap(map);
if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(tossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
}
baseMapper.deleteByMap(map); baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map); fragmentMapper.deleteByMap(map);
} }

View File

@@ -4,6 +4,8 @@ import java.io.IOException;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType; import okhttp3.MediaType;
import okhttp3.MultipartBody; import okhttp3.MultipartBody;
@@ -23,16 +25,19 @@ import org.springframework.web.multipart.MultipartFile;
/** /**
* PDF图片提取服务实现类 * PDF图片提取服务实现类
*/ */
@Service //@Service
@Slf4j @Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService { @Data
@AllArgsConstructor
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
public class PdfImageExtractServiceImpl {
@Value("${pdf.extract.service.url}") // @Value("${pdf.extract.service.url}")
private String serviceUrl; private String serviceUrl;
@Value("${pdf.extract.ai-api.url}") // @Value("${pdf.extract.ai-api.url}")
private String aiApiUrl; private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}") // @Value("${pdf.extract.ai-api.key}")
private String aiApiKey ; private String aiApiKey;
private final OkHttpClient client = new Builder() private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS) .connectTimeout(100, TimeUnit.SECONDS)
@@ -43,7 +48,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8"); private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
@Override // @Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates) public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException { throws IOException {
// 构建multipart请求 // 构建multipart请求
@@ -77,7 +82,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
* @return 文件内容结果列表 * @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误 * @throws IOException 如果API调用过程中发生错误
*/ */
@Override // @Override
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException { public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
List<PdfFileContentResult> results = new ArrayList<>(); List<PdfFileContentResult> results = new ArrayList<>();
int i = 0; int i = 0;
@@ -110,6 +115,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
// 执行请求 // 执行请求
try { try {
log.info("=============call=" + ++i); log.info("=============call=" + ++i);
Response response = client.newCall(request).execute(); Response response = client.newCall(request).execute();
log.info("=============response=" + response); log.info("=============response=" + response);
if (!response.isSuccessful()) { if (!response.isSuccessful()) {
@@ -126,11 +132,10 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
throw new RuntimeException(e); throw new RuntimeException(e);
} }
} }
return results; return results;
} }
@Override // @Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException { public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png"; String format = "png";
boolean allowDuplicates = true; boolean allowDuplicates = true;

View File

@@ -97,7 +97,7 @@ public class VectorStoreServiceImpl implements VectorStoreService {
for (int i = 0; i < chunkList.size(); i++) { for (int i = 0; i < chunkList.size(); i++) {
Map<String, Object> dataSchema = new HashMap<>(); Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("kid", storeEmbeddingBo.getKid()); dataSchema.put("kid", storeEmbeddingBo.getKid());
dataSchema.put("docId", storeEmbeddingBo.getKid()); dataSchema.put("docId", storeEmbeddingBo.getDocId());
dataSchema.put("fid", storeEmbeddingBo.getFids().get(i)); dataSchema.put("fid", storeEmbeddingBo.getFids().get(i));
Embedding embedding = embeddingModel.embed(chunkList.get(i)).content(); Embedding embedding = embeddingModel.embed(chunkList.get(i)).content();
TextSegment segment = TextSegment.from(chunkList.get(i)); TextSegment segment = TextSegment.from(chunkList.get(i));

View File

@@ -11,6 +11,8 @@ import java.util.Base64;
import java.util.List; import java.util.List;
import java.util.zip.ZipEntry; import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream; import java.util.zip.ZipInputStream;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;
/** /**
* ZIP文件处理工具类 * ZIP文件处理工具类
@@ -92,4 +94,90 @@ public class ZipUtils {
} }
return base64Contents.toArray(new String[0]); return base64Contents.toArray(new String[0]);
} }
/**
* 解压ZIP文件并返回MultipartFile数组
*
* @param zipData ZIP文件的字节数组
* @return MultipartFile数组
* @throws IOException 如果解压过程中发生错误
*/
public static MultipartFile[] unzipToMultipartFiles(byte[] zipData) throws IOException {
List<MultipartFile> multipartFiles = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (!zipEntry.isDirectory()) {
// 读取文件内容到内存
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
// 创建MultipartFile对象
String fileName = zipEntry.getName();
byte[] content = baos.toByteArray();
String contentType = determineContentType(fileName);
MultipartFile multipartFile = new MockMultipartFile(
fileName, // 文件名
fileName, // 原始文件名
contentType, // 内容类型
content // 文件内容
);
multipartFiles.add(multipartFile);
}
zis.closeEntry();
}
}
return multipartFiles.toArray(new MultipartFile[0]);
}
/**
* 根据文件名确定内容类型
*
* @param fileName 文件名
* @return 内容类型
*/
private static String determineContentType(String fileName) {
String extension = "";
int i = fileName.lastIndexOf('.');
if (i > 0) {
extension = fileName.substring(i + 1).toLowerCase();
}
switch (extension) {
case "txt":
return "text/plain";
case "html":
case "htm":
return "text/html";
case "pdf":
return "application/pdf";
case "jpg":
case "jpeg":
return "image/jpeg";
case "png":
return "image/png";
case "gif":
return "image/gif";
case "doc":
case "docx":
return "application/msword";
case "xls":
case "xlsx":
return "application/vnd.ms-excel";
case "xml":
return "application/xml";
case "json":
return "application/json";
default:
return "application/octet-stream";
}
}
} }

View File

@@ -17,5 +17,11 @@
<maven.compiler.target>17</maven.compiler.target> <maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties> </properties>
<dependencies>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
</dependency>
</dependencies>
</project> </project>

View File

@@ -1,5 +1,6 @@
package org.ruoyi.system.service; package org.ruoyi.system.service;
import org.ruoyi.common.log.event.LogininforEvent;
import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo; import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.system.domain.bo.SysLogininforBo; import org.ruoyi.system.domain.bo.SysLogininforBo;
@@ -44,4 +45,6 @@ public interface ISysLogininforService {
* 清空系统登录日志 * 清空系统登录日志
*/ */
void cleanLogininfor(); void cleanLogininfor();
void recordLogininfor(LogininforEvent logininforEvent);
} }

View File

@@ -1,5 +1,6 @@
package org.ruoyi.system.service; package org.ruoyi.system.service;
import org.ruoyi.common.log.event.OperLogEvent;
import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo; import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.system.domain.bo.SysOperLogBo; import org.ruoyi.system.domain.bo.SysOperLogBo;
@@ -51,4 +52,5 @@ public interface ISysOperLogService {
* 清空操作日志 * 清空操作日志
*/ */
void cleanOperLog(); void cleanOperLog();
void recordOper(OperLogEvent operLogEvent);
} }

View File

@@ -28,6 +28,10 @@ public interface ISysOssService {
void download(Long ossId, HttpServletResponse response) throws IOException; void download(Long ossId, HttpServletResponse response) throws IOException;
MultipartFile downloadByFile(Long ossId) throws IOException;
String downloadByByte(Long ossId) throws IOException;
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid); Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
} }

View File

@@ -48,6 +48,7 @@ public class SysLogininforServiceImpl implements ISysLogininforService {
*/ */
@Async @Async
@EventListener @EventListener
@Override
public void recordLogininfor(LogininforEvent logininforEvent) { public void recordLogininfor(LogininforEvent logininforEvent) {
HttpServletRequest request = logininforEvent.getRequest(); HttpServletRequest request = logininforEvent.getRequest();
final UserAgent userAgent = UserAgentUtil.parse(request.getHeader("User-Agent")); final UserAgent userAgent = UserAgentUtil.parse(request.getHeader("User-Agent"));

View File

@@ -42,6 +42,7 @@ public class SysOperLogServiceImpl implements ISysOperLogService {
*/ */
@Async @Async
@EventListener @EventListener
@Override
public void recordOper(OperLogEvent operLogEvent) { public void recordOper(OperLogEvent operLogEvent) {
SysOperLogBo operLog = MapstructUtils.convert(operLogEvent, SysOperLogBo.class); SysOperLogBo operLog = MapstructUtils.convert(operLogEvent, SysOperLogBo.class);
// 远程查询操作地点 // 远程查询操作地点

View File

@@ -7,6 +7,7 @@ import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import jakarta.servlet.http.HttpServletResponse; import jakarta.servlet.http.HttpServletResponse;
import java.util.Base64;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.constant.CacheNames; import org.ruoyi.common.core.constant.CacheNames;
import org.ruoyi.common.core.exception.ServiceException; import org.ruoyi.common.core.exception.ServiceException;
@@ -29,6 +30,7 @@ import org.ruoyi.system.mapper.SysOssMapper;
import org.ruoyi.system.service.ISysOssService; import org.ruoyi.system.service.ISysOssService;
import org.springframework.cache.annotation.Cacheable; import org.springframework.cache.annotation.Cacheable;
import org.springframework.http.MediaType; import org.springframework.http.MediaType;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
@@ -87,7 +89,8 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
Map<String, Object> params = bo.getParams(); Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<SysOss> lqw = Wrappers.lambdaQuery(); LambdaQueryWrapper<SysOss> lqw = Wrappers.lambdaQuery();
lqw.like(StringUtils.isNotBlank(bo.getFileName()), SysOss::getFileName, bo.getFileName()); lqw.like(StringUtils.isNotBlank(bo.getFileName()), SysOss::getFileName, bo.getFileName());
lqw.like(StringUtils.isNotBlank(bo.getOriginalName()), SysOss::getOriginalName, bo.getOriginalName()); lqw.like(StringUtils.isNotBlank(bo.getOriginalName()), SysOss::getOriginalName,
bo.getOriginalName());
lqw.eq(StringUtils.isNotBlank(bo.getFileSuffix()), SysOss::getFileSuffix, bo.getFileSuffix()); lqw.eq(StringUtils.isNotBlank(bo.getFileSuffix()), SysOss::getFileSuffix, bo.getFileSuffix());
lqw.eq(StringUtils.isNotBlank(bo.getUrl()), SysOss::getUrl, bo.getUrl()); lqw.eq(StringUtils.isNotBlank(bo.getUrl()), SysOss::getUrl, bo.getUrl());
lqw.between(params.get("beginCreateTime") != null && params.get("endCreateTime") != null, lqw.between(params.get("beginCreateTime") != null && params.get("endCreateTime") != null,
@@ -112,7 +115,7 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
FileUtils.setAttachmentResponseHeader(response, sysOss.getOriginalName()); FileUtils.setAttachmentResponseHeader(response, sysOss.getOriginalName());
response.setContentType(MediaType.APPLICATION_OCTET_STREAM_VALUE + "; charset=UTF-8"); response.setContentType(MediaType.APPLICATION_OCTET_STREAM_VALUE + "; charset=UTF-8");
OssClient storage = OssFactory.instance(); OssClient storage = OssFactory.instance();
try(InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) { try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
int available = inputStream.available(); int available = inputStream.available();
IoUtil.copy(inputStream, response.getOutputStream(), available); IoUtil.copy(inputStream, response.getOutputStream(), available);
response.setContentLength(available); response.setContentLength(available);
@@ -120,11 +123,50 @@ public class SysOssServiceImpl implements ISysOssService, OssService {
throw new ServiceException(e.getMessage()); throw new ServiceException(e.getMessage());
} }
} }
@Override
public String downloadByByte(Long ossId) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
// 读取输入流中的所有字节
byte[] bytes = IoUtil.readBytes(inputStream);
// 将字节数组转换为Base64编码的字符串
return Base64.getEncoder().encodeToString(bytes);
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
@Override
public MultipartFile downloadByFile(Long ossId) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
byte[] content = IoUtil.readBytes(inputStream);
return new MockMultipartFile(
sysOss.getFileName(),
sysOss.getOriginalName(),
MediaType.APPLICATION_OCTET_STREAM_VALUE,
content
);
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
@Override @Override
public SysOssVo upload(MultipartFile file) { public SysOssVo upload(MultipartFile file) {
String originalfileName = file.getOriginalFilename(); String originalfileName = file.getOriginalFilename();
String suffix = StringUtils.substring(originalfileName, originalfileName.lastIndexOf("."), originalfileName.length()); String suffix = StringUtils.substring(originalfileName, originalfileName.lastIndexOf("."),
originalfileName.length());
OssClient storage = OssFactory.instance(); OssClient storage = OssFactory.instance();
UploadResult uploadResult; UploadResult uploadResult;
try { try {

View File

@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
private final IKnowledgeFragmentService fragmentService; private final IKnowledgeFragmentService fragmentService;
private final PdfImageExtractService pdfImageExtractService; // private final PdfImageExtractService pdfImageExtractService;
/** /**
* 根据用户信息查询本地知识库 * 根据用户信息查询本地知识库
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
* @param file PDF文件 * @param file PDF文件
* @return 文件名称和图片内容 * @return 文件名称和图片内容
*/ */
@PostMapping("/extract-images") // @PostMapping("/extract-images")
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回") // @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
public R<List<PdfFileContentResult>> extractImages( // public R<List<PdfFileContentResult>> extractImages(
@RequestPart("file") MultipartFile file // @RequestPart("file") MultipartFile file
) throws IOException { // ) throws IOException {
return R.ok(pdfImageExtractService.extractImages(file)); // return R.ok(pdfImageExtractService.extractImages(file));
} // }
} }

View File

@@ -0,0 +1,390 @@
package org.ruoyi.chat.service.knowledge;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import org.ruoyi.chain.loader.ResourceLoaderFactory;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.domain.KnowledgeAttach;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.KnowledgeFragment;
import org.ruoyi.domain.KnowledgeInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.StoreEmbeddingBo;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.mapper.KnowledgeAttachMapper;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.system.service.ISysOssService;
import org.ruoyi.utils.ZipUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* @Description:
* @Date: 2025/5/15 下午4:29
*/
@Service
@RequiredArgsConstructor
public class DealFileService {
private static final Logger log = LoggerFactory.getLogger(DealFileService.class);
private final KnowledgeInfoMapper baseMapper;
private final VectorStoreService vectorStoreService;
private final ResourceLoaderFactory resourceLoaderFactory;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper attachMapper;
private final IChatModelService chatModelService;
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Async
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
try {
//锁定数据 更改VectorStatus 到进行中
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId())
) == 0) {
return;
}
List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList(
new LambdaQueryWrapper<KnowledgeFragment>()
.eq(KnowledgeFragment::getKid, attachItem.getKid())
.eq(KnowledgeFragment::getDocId, attachItem.getDocId())
);
if (ObjectUtil.isEmpty(knowledgeFragments)) {
throw new Exception("文件段落为空");
}
List<String> fids = knowledgeFragments.stream()
.map(KnowledgeFragment::getFid)
.collect(Collectors.toList());
if (ObjectUtil.isEmpty(fids)) {
throw new Exception("fids 为空");
}
List<String> chunkList = knowledgeFragments.stream()
.map(KnowledgeFragment::getContent)
.collect(Collectors.toList());
if (ObjectUtil.isEmpty(chunkList)) {
throw new Exception("chunkList 为空");
}
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getId, attachItem.getKid()));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(
knowledgeInfoVo.getEmbeddingModelName());
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(attachItem.getKid());
storeEmbeddingBo.setDocId(attachItem.getDocId());
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
//设置处理完成
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getId, attachItem.getId()));
} catch (Exception e) {
//设置处理失败
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
.set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getId, attachItem.getId()));
throw new RuntimeException(e);
}
}
@Async
public void dealPicStatus(KnowledgeAttach attachItem) throws Exception {
try {
//锁定数据 更改picStatus 到进行中
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId())
) == 0) {
return;
}
//获取附件
if (ObjectUtil.isEmpty(attachItem.getOssId())) {
log.error("==========OssId 为空attachItem={}", attachItem);
throw new Exception("OssId 为空");
}
//获取oss文件
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
//拆解出图片ZIP
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
aiApiUrl, aiApiKey);
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
//解压zip得到图片文件
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
//上传文件到OSS写入表
for (MultipartFile file : multipartFiles) {
//先查找是否有相同图片名称,先做删除
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getKid, attachItem.getKid())
.eq(KnowledgeAttachPic::getAid, attachItem.getId())
.eq(KnowledgeAttachPic::getDocName, file.getOriginalFilename())
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> ossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(ossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
SysOssVo upload = ossService.upload(file);
KnowledgeAttachPic entity = new KnowledgeAttachPic();
entity.setKid(attachItem.getKid());
entity.setAid(String.valueOf(attachItem.getId()));
entity.setDocName(file.getOriginalFilename());
entity.setDocType(
file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1));
entity.setOssId(upload.getOssId());
int[] ints = extractPageNumbers(file.getOriginalFilename());
if (ObjectUtil.isNotEmpty(ints)) {
assert ints != null;
if (ints.length == 2) {
entity.setPageNum(ints[0]);
entity.setIndexNum(ints[1]);
}
}
picMapper.insert(entity);
}
//设置处理完成
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId()));
} catch (Exception e) {
//设置处理失败
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
.set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId()));
throw new RuntimeException(e);
}
}
@Async
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
try {
//锁定数据 更改 getPicAnysStatus 到进行中
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getId, picItem.getId())
) == 0) {
return;
}
SysOssVo ossVo = ossService.getById(picItem.getOssId());
if (ObjectUtil.isNotEmpty(ossVo)) {
String fileStr = ossService.downloadByByte(picItem.getOssId());
//调用第三方 分析图片内容
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
serviceUrl,
aiApiUrl, aiApiKey);
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
new String[]{fileStr});
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
for (PdfFileContentResult resultItem : pdfFileContentResults) {
//图片解析内容回写到pic表
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getContent, parseContent(resultItem.getContent()))
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttachPic::getId, picItem.getId()));
//将图片解析内容 写入段落表 fragment
KnowledgeAttachVo knowledgeAttachVo = attachMapper.selectVoById(picItem.getAid());
if (ObjectUtil.isNotEmpty(knowledgeAttachVo)) {
String fid = RandomUtil.randomString(10);
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
knowledgeFragment.setKid(knowledgeAttachVo.getKid());
knowledgeFragment.setDocId(knowledgeAttachVo.getDocId());
knowledgeFragment.setFid(fid);
knowledgeFragment.setIdx(0);
knowledgeFragment.setContent(parseContent(resultItem.getContent()));
knowledgeFragment.setCreateTime(new Date());
fragmentMapper.insert(knowledgeFragment);
//更新attach表需要所有图片都处理完毕
// 查询非30状态完成状态的记录数量
long nonStatus30Count = picMapper.selectCount(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.ne(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttachPic::getAid, picItem.getAid())
);
if (nonStatus30Count == 0) {
// 执行表更新操作
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, picItem.getAid()));
}
}
}
}
}
} catch (Exception e) {
//失败
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40)
.set(KnowledgeAttachPic::getRemark, picItem.getRemark() + e.getMessage())
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getId, picItem.getId()));
throw new RuntimeException(e);
} finally {
//将图片分析失败的数据 重新设置状态
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
}
}
/**
* 从文件名中提取page后面的两个数字
*
* @param fileName 文件名
* @return 包含两个数字的数组如果未找到则返回null
*/
public static int[] extractPageNumbers(String fileName) {
// 查找"page_"的位置
int pageIndex = fileName.indexOf("page_");
if (pageIndex == -1) {
return null;
}
// 从"page_"后开始截取
String afterPage = fileName.substring(pageIndex + 5);
// 按下划线分割
String[] parts = afterPage.split("_");
if (parts.length >= 2) {
try {
// 提取两个数字
int firstNumber = Integer.parseInt(parts[0]);
// 对于第二个数字,需要去掉可能的文件扩展名
String secondPart = parts[1];
int dotIndex = secondPart.indexOf(".");
if (dotIndex != -1) {
secondPart = secondPart.substring(0, dotIndex);
}
int secondNumber = Integer.parseInt(secondPart);
return new int[]{firstNumber, secondNumber};
} catch (NumberFormatException e) {
return null;
}
}
return null;
}
public static String parseContent(String jsonString) {
try {
// 创建ObjectMapper实例
ObjectMapper objectMapper = new ObjectMapper();
// 解析JSON字符串
JsonNode rootNode = objectMapper.readTree(jsonString);
// 获取choices数组的第一个元素
JsonNode choicesNode = rootNode.get("choices");
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
// 获取第一个choice
JsonNode firstChoice = choicesNode.get(0);
// 获取message节点
JsonNode messageNode = firstChoice.get("message");
if (messageNode != null) {
// 获取content字段的值
JsonNode contentNode = messageNode.get("content");
if (contentNode != null) {
return contentNode.asText();
}
}
}
return "无法找到content内容";
} catch (Exception e) {
e.printStackTrace();
return "解析JSON时发生错误: " + e.getMessage();
}
}
}

View File

@@ -1,44 +1,63 @@
package org.ruoyi.chat.service.knowledge; package org.ruoyi.chat.service.knowledge;
import cn.hutool.core.collection.CollUtil; import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil; import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.loader.ResourceLoader; import org.ruoyi.chain.loader.ResourceLoader;
import org.ruoyi.chain.loader.ResourceLoaderFactory; import org.ruoyi.chain.loader.ResourceLoaderFactory;
import org.ruoyi.common.core.domain.model.LoginUser; import org.ruoyi.common.core.domain.model.LoginUser;
import org.ruoyi.common.core.utils.MapstructUtils; import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils; import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.satoken.utils.LoginHelper; import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.constant.FileType;
import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo; import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.ChatModel; import org.ruoyi.domain.ChatModel;
import org.ruoyi.domain.KnowledgeAttach; import org.ruoyi.domain.KnowledgeAttach;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.KnowledgeFragment; import org.ruoyi.domain.KnowledgeFragment;
import org.ruoyi.domain.KnowledgeInfo; import org.ruoyi.domain.KnowledgeInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.KnowledgeInfoBo; import org.ruoyi.domain.bo.KnowledgeInfoBo;
import org.ruoyi.domain.bo.KnowledgeInfoUploadBo; import org.ruoyi.domain.bo.KnowledgeInfoUploadBo;
import org.ruoyi.domain.bo.StoreEmbeddingBo; import org.ruoyi.domain.bo.StoreEmbeddingBo;
import org.ruoyi.domain.vo.ChatModelVo; import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo; import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.mapper.KnowledgeAttachMapper; import org.ruoyi.mapper.KnowledgeAttachMapper;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper; import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper; import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService; import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.IKnowledgeInfoService; import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.utils.ZipUtils;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile; import org.springframework.web.multipart.MultipartFile;
import org.ruoyi.system.service.ISysOssService;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.*;
/** /**
* 知识库Service业务层处理 * 知识库Service业务层处理
* *
@@ -62,11 +81,26 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
private final IChatModelService chatModelService; private final IChatModelService chatModelService;
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
private final DealFileService dealFileService;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
/** /**
* 查询知识库 * 查询知识库
*/ */
@Override @Override
public KnowledgeInfoVo queryById(Long id){ public KnowledgeInfoVo queryById(Long id) {
return baseMapper.selectVoById(id); return baseMapper.selectVoById(id);
} }
@@ -96,9 +130,12 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid()); lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid());
lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname()); lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname());
lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare()); lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare());
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription, bo.getDescription()); lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription,
lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator, bo.getKnowledgeSeparator()); bo.getDescription());
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator, bo.getQuestionSeparator()); lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator,
bo.getKnowledgeSeparator());
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator,
bo.getQuestionSeparator());
lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar()); lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar());
lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit()); lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit());
lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize()); lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize());
@@ -132,7 +169,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
/** /**
* 保存前的数据校验 * 保存前的数据校验
*/ */
private void validEntityBeforeSave(KnowledgeInfo entity){ private void validEntityBeforeSave(KnowledgeInfo entity) {
//TODO 做一些数据校验,如唯一约束 //TODO 做一些数据校验,如唯一约束
} }
@@ -141,7 +178,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
*/ */
@Override @Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) { public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){ if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验 //TODO 做一些业务上的校验,判断是否需要校验
} }
return baseMapper.deleteBatchIds(ids) > 0; return baseMapper.deleteBatchIds(ids) > 0;
@@ -151,7 +188,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
@Transactional(rollbackFor = Exception.class) @Transactional(rollbackFor = Exception.class)
public void saveOne(KnowledgeInfoBo bo) { public void saveOne(KnowledgeInfoBo bo) {
KnowledgeInfo knowledgeInfo = MapstructUtils.convert(bo, KnowledgeInfo.class); KnowledgeInfo knowledgeInfo = MapstructUtils.convert(bo, KnowledgeInfo.class);
if (StringUtils.isBlank(bo.getKid())){ if (StringUtils.isBlank(bo.getKid())) {
String kid = RandomUtil.randomString(10); String kid = RandomUtil.randomString(10);
if (knowledgeInfo != null) { if (knowledgeInfo != null) {
knowledgeInfo.setKid(kid); knowledgeInfo.setKid(kid);
@@ -159,9 +196,10 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
baseMapper.insert(knowledgeInfo); baseMapper.insert(knowledgeInfo);
if (knowledgeInfo != null) { if (knowledgeInfo != null) {
vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()),bo.getVectorModelName()); vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()),
bo.getVectorModelName());
} }
}else { } else {
baseMapper.updateById(knowledgeInfo); baseMapper.updateById(knowledgeInfo);
} }
} }
@@ -169,16 +207,45 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
@Override @Override
@Transactional(rollbackFor = Exception.class) @Transactional(rollbackFor = Exception.class)
public void removeKnowledge(String id) { public void removeKnowledge(String id) {
Map<String,Object> map = new HashMap<>(); Map<String, Object> map = new HashMap<>();
map.put("kid",id); map.put("kid", id);
List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map); List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map);
check(knowledgeInfoList); check(knowledgeInfoList);
// 删除向量库信息 // 删除向量库信息
knowledgeInfoList.forEach(knowledgeInfoVo -> { knowledgeInfoList.forEach(knowledgeInfoVo -> {
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName()); vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),
knowledgeInfoVo.getVectorModelName());
}); });
// 删除附件和知识片段 // 删除附件和知识片段
fragmentMapper.deleteByMap(map); fragmentMapper.deleteByMap(map);
List<KnowledgeAttachVo> knowledgeAttachVos = attachMapper.selectVoByMap(map);
if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(tossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
}
attachMapper.deleteByMap(map); attachMapper.deleteByMap(map);
// 删除知识库 // 删除知识库
baseMapper.deleteByMap(map); baseMapper.deleteByMap(map);
@@ -190,6 +257,11 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
public void storeContent(MultipartFile file, String kid) { public void storeContent(MultipartFile file, String kid) {
if (file == null || file.isEmpty()) {
throw new IllegalArgumentException("File cannot be null or empty");
}
SysOssVo uploadDto = null;
String fileName = file.getOriginalFilename(); String fileName = file.getOriginalFilename();
List<String> chunkList = new ArrayList<>(); List<String> chunkList = new ArrayList<>();
KnowledgeAttach knowledgeAttach = new KnowledgeAttach(); KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
@@ -197,15 +269,18 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
String docId = RandomUtil.randomString(10); String docId = RandomUtil.randomString(10);
knowledgeAttach.setDocId(docId); knowledgeAttach.setDocId(docId);
knowledgeAttach.setDocName(fileName); knowledgeAttach.setDocName(fileName);
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1)); knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1));
String content = ""; String content = "";
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType()); ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(
knowledgeAttach.getDocType());
List<String> fids = new ArrayList<>(); List<String> fids = new ArrayList<>();
try { try {
content = resourceLoader.getContent(file.getInputStream()); content = resourceLoader.getContent(file.getInputStream());
chunkList = resourceLoader.getChunkList(content, kid); chunkList = resourceLoader.getChunkList(content, kid);
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>(); List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
if (CollUtil.isNotEmpty(chunkList)) { if (CollUtil.isNotEmpty(chunkList)) {
// Upload file to OSS
uploadDto = ossService.upload(file);
for (int i = 0; i < chunkList.size(); i++) { for (int i = 0; i < chunkList.size(); i++) {
String fid = RandomUtil.randomString(10); String fid = RandomUtil.randomString(10);
fids.add(fid); fids.add(fid);
@@ -225,25 +300,21 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
} }
knowledgeAttach.setContent(content); knowledgeAttach.setContent(content);
knowledgeAttach.setCreateTime(new Date()); knowledgeAttach.setCreateTime(new Date());
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
knowledgeAttach.setOssId(uploadDto.getOssId());
//只有pdf文件 才需要拆解图片和分析图片内容
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
} else {
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
}
//所有文件上传后,都需要同步到向量数据库
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
}
attachMapper.insert(knowledgeAttach); attachMapper.insert(knowledgeAttach);
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getId, kid));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName());
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(kid);
storeEmbeddingBo.setDocId(docId);
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
} }
@@ -252,13 +323,75 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
* *
* @param knowledgeInfoList 知识库列表 * @param knowledgeInfoList 知识库列表
*/ */
public void check(List<KnowledgeInfoVo> knowledgeInfoList){ public void check(List<KnowledgeInfoVo> knowledgeInfoList) {
LoginUser loginUser = LoginHelper.getLoginUser(); LoginUser loginUser = LoginHelper.getLoginUser();
for (KnowledgeInfoVo knowledgeInfoVo : knowledgeInfoList) { for (KnowledgeInfoVo knowledgeInfoVo : knowledgeInfoList) {
if(!knowledgeInfoVo.getUid().equals(loginUser.getUserId())){ if (!knowledgeInfoVo.getUid().equals(loginUser.getUserId())) {
throw new SecurityException("权限不足"); throw new SecurityException("权限不足");
} }
} }
} }
/**
* 第一步 定时 拆解PDF文件中的图片
*/
@Scheduled(fixedDelay = 15000) // 每3秒执行一次
public void dealKnowledgeAttachPic() throws Exception {
//处理 拆解PDF文件中的图片的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
dealFileService.dealPicStatus(attachItem);
}
}
}
/**
* 第二步 定时 解析图片内容
*/
@Scheduled(fixedDelay = 15000)
public void dealKnowledgeAttachPicAnys() throws Exception {
//获取未处理的图片记录
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
dealFileService.dealPicAnysStatus(picItem);
}
}
}
/**
* 第三步 定时 处理 附件上传后上传向量数据库
*/
@Scheduled(fixedDelay = 30000) // 每3秒执行一次
public void dealKnowledgeAttachVector() throws Exception {
//处理 需要上传向量数据库的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
dealFileService.dealVectorStatus(attachItem);
}
}
}
} }

View File

@@ -1,6 +1,10 @@
ALTER TABLE `knowledge_attach` ALTER TABLE `knowledge_attach`
ADD COLUMN `oss_id` bigint(20) NOT NULL COMMENT '对象存储主键' AFTER `remark`,
ADD COLUMN `pic_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '拆解图片状态10未开始20进行中30已完成' AFTER `oss_id`, ADD COLUMN `pic_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '拆解图片状态10未开始20进行中30已完成' AFTER `oss_id`,
ADD COLUMN `pic_anys_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '分析图片状态10未开始20进行中30已完成' AFTER `pic_status`, ADD COLUMN `pic_anys_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '分析图片状态10未开始20进行中30已完成' AFTER `pic_status`,
ADD COLUMN `vector_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '写入向量数据库状态10未开始20进行中30已完成' AFTER `pic_anys_status`, ADD COLUMN `vector_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '写入向量数据库状态10未开始20进行中30已完成' AFTER `pic_anys_status`,
DROP PRIMARY KEY, DROP PRIMARY KEY,
ADD PRIMARY KEY (`id`) USING BTREE; ADD PRIMARY KEY (`id`) USING BTREE;
ALTER TABLE `knowledge_attach`
MODIFY COLUMN `remark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '备注' AFTER `update_time`;