feat:pdf文件解析图片和分析图片,上传向量数据库都修改成 成异步处理

This commit is contained in:
zhouweiyi
2025-05-15 17:46:38 +08:00
parent 584212c569
commit c6ffbcb3cf
24 changed files with 1585 additions and 414 deletions

View File

@@ -3,6 +3,7 @@ package org.ruoyi;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.boot.context.metrics.buffering.BufferingApplicationStartup;
import org.springframework.scheduling.annotation.EnableAsync;
import org.springframework.scheduling.annotation.EnableScheduling;
/**
@@ -12,6 +13,7 @@ import org.springframework.scheduling.annotation.EnableScheduling;
*/
@SpringBootApplication
@EnableScheduling
@EnableAsync
public class RuoYiAIApplication {
public static void main(String[] args) {

View File

@@ -114,6 +114,10 @@
<artifactId>commons-io</artifactId>
<version>2.17.0</version>
</dependency>
<dependency>
<groupId>org.ruoyi</groupId>
<artifactId>ruoyi-system-api</artifactId>
</dependency>
</dependencies>

View File

@@ -11,6 +11,8 @@ public class DealStatus {
public static final Integer STATUS_20 = 20;
//已结束
public static final Integer STATUS_30 = 30;
//处理失败
public static final Integer STATUS_40 = 40;
}

View File

@@ -0,0 +1,81 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.*;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.io.Serial;
import org.ruoyi.core.domain.BaseEntity;
/**
* 知识库附件图片列对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_attach_pic")
public class KnowledgeAttachPic extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@TableId(value = "id")
private Long id;
/**
* 知识库id
*/
private String kid;
/**
* 附件id
*/
private String aid;
/**
* 文档名称
*/
private String docName;
/**
* 文档类型
*/
private String docType;
/**
* 文档内容
*/
private String content;
/**
* 所在页数
*/
private Integer pageNum;
/**
* 所在页index
*/
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 备注
*/
private String remark;
}

View File

@@ -0,0 +1,90 @@
package org.ruoyi.domain.bo;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import lombok.EqualsAndHashCode;
import jakarta.validation.constraints.*;
import org.ruoyi.domain.KnowledgeAttachPic;
/**
* 知识库附件图片列业务对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeAttachPic.class, reverseConvertGenerate = false)
public class KnowledgeAttachPicBo extends BaseEntity {
/**
* 主键
*/
@NotNull(message = "主键不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库id
*/
@NotBlank(message = "知识库id不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 附件id
*/
@NotBlank(message = "附件id不能为空", groups = {AddGroup.class, EditGroup.class})
private String aid;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String docName;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
private String docType;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 所在页数
*/
@NotNull(message = "所在页数不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer pageNum;
/**
* 所在页index
*/
@NotNull(message = "所在页index不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class,
EditGroup.class})
private Integer picAnysStatus;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
}

View File

@@ -0,0 +1,92 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import java.io.Serial;
import java.io.Serializable;
import org.ruoyi.domain.KnowledgeAttachPic;
/**
* 知识库附件图片列视图对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeAttachPic.class)
public class KnowledgeAttachPicVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@ExcelProperty(value = "主键")
private Long id;
/**
* 知识库id
*/
@ExcelProperty(value = "知识库id")
private String kid;
/**
* 附件id
*/
@ExcelProperty(value = "附件id")
private String aid;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 所在页数
*/
@ExcelProperty(value = "所在页数")
private Integer pageNum;
/**
* 所在页index
*/
@ExcelProperty(value = "所在页index")
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
}

View File

@@ -0,0 +1,15 @@
package org.ruoyi.mapper;
import org.ruoyi.core.mapper.BaseMapperPlus;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
/**
* 知识库附件图片列Mapper接口
*
* @author Albert
* @date 2025-05-15
*/
public interface KnowledgeAttachPicMapper extends BaseMapperPlus<KnowledgeAttachPic, KnowledgeAttachPicVo> {
}

View File

@@ -0,0 +1,47 @@
package org.ruoyi.service;
import java.util.Collection;
import java.util.List;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.bo.KnowledgeAttachPicBo;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
/**
* 知识库附件图片列Service接口
*
* @author Albert
* @date 2025-05-15
*/
public interface IKnowledgeAttachPicService {
/**
* 查询知识库附件图片列
*/
KnowledgeAttachPicVo queryById(Long id);
/**
* 查询知识库附件图片列列表
*/
TableDataInfo<KnowledgeAttachPicVo> queryPageList(KnowledgeAttachPicBo bo, PageQuery pageQuery);
/**
* 查询知识库附件图片列列表
*/
List<KnowledgeAttachPicVo> queryList(KnowledgeAttachPicBo bo);
/**
* 新增知识库附件图片列
*/
Boolean insertByBo(KnowledgeAttachPicBo bo);
/**
* 修改知识库附件图片列
*/
Boolean updateByBo(KnowledgeAttachPicBo bo);
/**
* 校验并批量删除知识库附件图片列信息
*/
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
}

View File

@@ -0,0 +1,123 @@
package org.ruoyi.service.impl;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.core.page.PageQuery;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.bo.KnowledgeAttachPicBo;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.service.IKnowledgeAttachPicService;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Map;
import java.util.Collection;
/**
* 知识库附件图片列Service业务层处理
*
* @author ageerle
* @date 2025-05-15
*/
@RequiredArgsConstructor
@Service
public class KnowledgeAttachPicServiceImpl implements IKnowledgeAttachPicService {
private final KnowledgeAttachPicMapper baseMapper;
/**
* 查询知识库附件图片列
*/
@Override
public KnowledgeAttachPicVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件图片列列表
*/
@Override
public TableDataInfo<KnowledgeAttachPicVo> queryPageList(KnowledgeAttachPicBo bo,
PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttachPic> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachPicVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件图片列列表
*/
@Override
public List<KnowledgeAttachPicVo> queryList(KnowledgeAttachPicBo bo) {
LambdaQueryWrapper<KnowledgeAttachPic> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttachPic> buildQueryWrapper(KnowledgeAttachPicBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttachPic> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttachPic::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getAid()), KnowledgeAttachPic::getAid, bo.getAid());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttachPic::getDocName,
bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttachPic::getDocType,
bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttachPic::getContent,
bo.getContent());
lqw.eq(bo.getPageNum() != null, KnowledgeAttachPic::getPageNum, bo.getPageNum());
lqw.eq(bo.getIndexNum() != null, KnowledgeAttachPic::getIndexNum, bo.getIndexNum());
lqw.eq(bo.getPicAnysStatus() != null, KnowledgeAttachPic::getPicAnysStatus,
bo.getPicAnysStatus());
lqw.eq(bo.getOssId() != null, KnowledgeAttachPic::getOssId, bo.getOssId());
return lqw;
}
/**
* 新增知识库附件图片列
*/
@Override
public Boolean insertByBo(KnowledgeAttachPicBo bo) {
KnowledgeAttachPic add = MapstructUtils.convert(bo, KnowledgeAttachPic.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件图片列
*/
@Override
public Boolean updateByBo(KnowledgeAttachPicBo bo) {
KnowledgeAttachPic update = MapstructUtils.convert(bo, KnowledgeAttachPic.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttachPic entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件图片列
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
}

View File

@@ -1,5 +1,7 @@
package org.ruoyi.service.impl;
import cn.hutool.core.util.ObjectUtil;
import java.util.stream.Collectors;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.core.page.TableDataInfo;
@@ -8,8 +10,11 @@ import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.system.service.ISysOssService;
import org.springframework.stereotype.Service;
import org.ruoyi.domain.bo.KnowledgeAttachBo;
@@ -33,99 +38,130 @@ import java.util.Collection;
@Service
public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id){
return baseMapper.selectVoById(id);
private final ISysOssService ossService;
private final KnowledgeAttachPicMapper picMapper;
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
@Override
public void removeKnowledgeAttach(String docId) {
Map<String, Object> map = new HashMap<>();
map.put("doc_id", docId);
List<KnowledgeAttachVo> knowledgeAttachVos = baseMapper.selectVoByMap(map);
if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(tossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
}
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity){
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
public void removeKnowledgeAttach(String docId) {
Map<String,Object> map = new HashMap<>();
map.put("doc_id",docId);
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
@Override
public String translationByFile(MultipartFile file, String targetLanguage) {
@Override
public String translationByFile(MultipartFile file, String targetLanguage) {
/*String fileName = file.getOriginalFilename();
String docType = fileName.substring(fileName.lastIndexOf(".")+1);
String content = "";
@@ -173,6 +209,6 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
throw new BaseException("调用大模型失败,请检查密钥是否正确!");
}
return chatCompletionResponse.getChoices().get(0).getMessage().getContent().toString();*/
return "接口开发中!";
}
return "接口开发中!";
}
}

View File

@@ -4,6 +4,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
@@ -23,16 +25,19 @@ import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
@Service
//@Service
@Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
@Data
@AllArgsConstructor
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
public class PdfImageExtractServiceImpl {
@Value("${pdf.extract.service.url}")
// @Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
// @Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey ;
// @Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
@@ -43,7 +48,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
@Override
// @Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
@@ -77,7 +82,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
// @Override
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
List<PdfFileContentResult> results = new ArrayList<>();
int i = 0;
@@ -110,6 +115,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
// 执行请求
try {
log.info("=============call=" + ++i);
Response response = client.newCall(request).execute();
log.info("=============response=" + response);
if (!response.isSuccessful()) {
@@ -126,11 +132,10 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
throw new RuntimeException(e);
}
}
return results;
}
@Override
// @Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;

View File

@@ -97,7 +97,7 @@ public class VectorStoreServiceImpl implements VectorStoreService {
for (int i = 0; i < chunkList.size(); i++) {
Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("kid", storeEmbeddingBo.getKid());
dataSchema.put("docId", storeEmbeddingBo.getKid());
dataSchema.put("docId", storeEmbeddingBo.getDocId());
dataSchema.put("fid", storeEmbeddingBo.getFids().get(i));
Embedding embedding = embeddingModel.embed(chunkList.get(i)).content();
TextSegment segment = TextSegment.from(chunkList.get(i));

View File

@@ -11,6 +11,8 @@ import java.util.Base64;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;
/**
* ZIP文件处理工具类
@@ -92,4 +94,90 @@ public class ZipUtils {
}
return base64Contents.toArray(new String[0]);
}
/**
* 解压ZIP文件并返回MultipartFile数组
*
* @param zipData ZIP文件的字节数组
* @return MultipartFile数组
* @throws IOException 如果解压过程中发生错误
*/
public static MultipartFile[] unzipToMultipartFiles(byte[] zipData) throws IOException {
List<MultipartFile> multipartFiles = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (!zipEntry.isDirectory()) {
// 读取文件内容到内存
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
// 创建MultipartFile对象
String fileName = zipEntry.getName();
byte[] content = baos.toByteArray();
String contentType = determineContentType(fileName);
MultipartFile multipartFile = new MockMultipartFile(
fileName, // 文件名
fileName, // 原始文件名
contentType, // 内容类型
content // 文件内容
);
multipartFiles.add(multipartFile);
}
zis.closeEntry();
}
}
return multipartFiles.toArray(new MultipartFile[0]);
}
/**
* 根据文件名确定内容类型
*
* @param fileName 文件名
* @return 内容类型
*/
private static String determineContentType(String fileName) {
String extension = "";
int i = fileName.lastIndexOf('.');
if (i > 0) {
extension = fileName.substring(i + 1).toLowerCase();
}
switch (extension) {
case "txt":
return "text/plain";
case "html":
case "htm":
return "text/html";
case "pdf":
return "application/pdf";
case "jpg":
case "jpeg":
return "image/jpeg";
case "png":
return "image/png";
case "gif":
return "image/gif";
case "doc":
case "docx":
return "application/msword";
case "xls":
case "xlsx":
return "application/vnd.ms-excel";
case "xml":
return "application/xml";
case "json":
return "application/json";
default:
return "application/octet-stream";
}
}
}

View File

@@ -17,5 +17,11 @@
<maven.compiler.target>17</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
</dependency>
</dependencies>
</project>

View File

@@ -1,5 +1,6 @@
package org.ruoyi.system.service;
import org.ruoyi.common.log.event.LogininforEvent;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.system.domain.bo.SysLogininforBo;
@@ -44,4 +45,6 @@ public interface ISysLogininforService {
* 清空系统登录日志
*/
void cleanLogininfor();
void recordLogininfor(LogininforEvent logininforEvent);
}

View File

@@ -1,5 +1,6 @@
package org.ruoyi.system.service;
import org.ruoyi.common.log.event.OperLogEvent;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.system.domain.bo.SysOperLogBo;
@@ -51,4 +52,5 @@ public interface ISysOperLogService {
* 清空操作日志
*/
void cleanOperLog();
void recordOper(OperLogEvent operLogEvent);
}

View File

@@ -18,16 +18,20 @@ import java.util.List;
*/
public interface ISysOssService {
TableDataInfo<SysOssVo> queryPageList(SysOssBo sysOss, PageQuery pageQuery);
TableDataInfo<SysOssVo> queryPageList(SysOssBo sysOss, PageQuery pageQuery);
List<SysOssVo> listByIds(Collection<Long> ossIds);
List<SysOssVo> listByIds(Collection<Long> ossIds);
SysOssVo getById(Long ossId);
SysOssVo getById(Long ossId);
SysOssVo upload(MultipartFile file);
SysOssVo upload(MultipartFile file);
void download(Long ossId, HttpServletResponse response) throws IOException;
void download(Long ossId, HttpServletResponse response) throws IOException;
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
MultipartFile downloadByFile(Long ossId) throws IOException;
String downloadByByte(Long ossId) throws IOException;
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
}

View File

@@ -48,6 +48,7 @@ public class SysLogininforServiceImpl implements ISysLogininforService {
*/
@Async
@EventListener
@Override
public void recordLogininfor(LogininforEvent logininforEvent) {
HttpServletRequest request = logininforEvent.getRequest();
final UserAgent userAgent = UserAgentUtil.parse(request.getHeader("User-Agent"));

View File

@@ -42,6 +42,7 @@ public class SysOperLogServiceImpl implements ISysOperLogService {
*/
@Async
@EventListener
@Override
public void recordOper(OperLogEvent operLogEvent) {
SysOperLogBo operLog = MapstructUtils.convert(operLogEvent, SysOperLogBo.class);
// 远程查询操作地点

View File

@@ -7,6 +7,7 @@ import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import jakarta.servlet.http.HttpServletResponse;
import java.util.Base64;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.constant.CacheNames;
import org.ruoyi.common.core.exception.ServiceException;
@@ -29,6 +30,7 @@ import org.ruoyi.system.mapper.SysOssMapper;
import org.ruoyi.system.service.ISysOssService;
import org.springframework.cache.annotation.Cacheable;
import org.springframework.http.MediaType;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
@@ -48,127 +50,167 @@ import java.util.Map;
@Service
public class SysOssServiceImpl implements ISysOssService, OssService {
private final SysOssMapper baseMapper;
private final SysOssMapper baseMapper;
@Override
public TableDataInfo<SysOssVo> queryPageList(SysOssBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<SysOss> lqw = buildQueryWrapper(bo);
Page<SysOssVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
List<SysOssVo> filterResult = StreamUtils.toList(result.getRecords(), this::matchingUrl);
result.setRecords(filterResult);
return TableDataInfo.build(result);
@Override
public TableDataInfo<SysOssVo> queryPageList(SysOssBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<SysOss> lqw = buildQueryWrapper(bo);
Page<SysOssVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
List<SysOssVo> filterResult = StreamUtils.toList(result.getRecords(), this::matchingUrl);
result.setRecords(filterResult);
return TableDataInfo.build(result);
}
@Override
public List<SysOssVo> listByIds(Collection<Long> ossIds) {
List<SysOssVo> list = new ArrayList<>();
for (Long id : ossIds) {
SysOssVo vo = SpringUtils.getAopProxy(this).getById(id);
if (ObjectUtil.isNotNull(vo)) {
list.add(this.matchingUrl(vo));
}
}
return list;
}
@Override
public String selectUrlByIds(String ossIds) {
List<String> list = new ArrayList<>();
for (Long id : StringUtils.splitTo(ossIds, Convert::toLong)) {
SysOssVo vo = SpringUtils.getAopProxy(this).getById(id);
if (ObjectUtil.isNotNull(vo)) {
list.add(this.matchingUrl(vo).getUrl());
}
}
return String.join(StringUtils.SEPARATOR, list);
}
private LambdaQueryWrapper<SysOss> buildQueryWrapper(SysOssBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<SysOss> lqw = Wrappers.lambdaQuery();
lqw.like(StringUtils.isNotBlank(bo.getFileName()), SysOss::getFileName, bo.getFileName());
lqw.like(StringUtils.isNotBlank(bo.getOriginalName()), SysOss::getOriginalName,
bo.getOriginalName());
lqw.eq(StringUtils.isNotBlank(bo.getFileSuffix()), SysOss::getFileSuffix, bo.getFileSuffix());
lqw.eq(StringUtils.isNotBlank(bo.getUrl()), SysOss::getUrl, bo.getUrl());
lqw.between(params.get("beginCreateTime") != null && params.get("endCreateTime") != null,
SysOss::getCreateTime, params.get("beginCreateTime"), params.get("endCreateTime"));
lqw.eq(ObjectUtil.isNotNull(bo.getCreateBy()), SysOss::getCreateBy, bo.getCreateBy());
lqw.eq(StringUtils.isNotBlank(bo.getService()), SysOss::getService, bo.getService());
return lqw;
}
@Cacheable(cacheNames = CacheNames.SYS_OSS, key = "#ossId")
@Override
public SysOssVo getById(Long ossId) {
return baseMapper.selectVoById(ossId);
}
@Override
public void download(Long ossId, HttpServletResponse response) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
FileUtils.setAttachmentResponseHeader(response, sysOss.getOriginalName());
response.setContentType(MediaType.APPLICATION_OCTET_STREAM_VALUE + "; charset=UTF-8");
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
int available = inputStream.available();
IoUtil.copy(inputStream, response.getOutputStream(), available);
response.setContentLength(available);
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
@Override
public String downloadByByte(Long ossId) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
@Override
public List<SysOssVo> listByIds(Collection<Long> ossIds) {
List<SysOssVo> list = new ArrayList<>();
for (Long id : ossIds) {
SysOssVo vo = SpringUtils.getAopProxy(this).getById(id);
if (ObjectUtil.isNotNull(vo)) {
list.add(this.matchingUrl(vo));
}
}
return list;
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
// 读取输入流中的所有字节
byte[] bytes = IoUtil.readBytes(inputStream);
// 将字节数组转换为Base64编码的字符串
return Base64.getEncoder().encodeToString(bytes);
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
@Override
public MultipartFile downloadByFile(Long ossId) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
@Override
public String selectUrlByIds(String ossIds) {
List<String> list = new ArrayList<>();
for (Long id : StringUtils.splitTo(ossIds, Convert::toLong)) {
SysOssVo vo = SpringUtils.getAopProxy(this).getById(id);
if (ObjectUtil.isNotNull(vo)) {
list.add(this.matchingUrl(vo).getUrl());
}
}
return String.join(StringUtils.SEPARATOR, list);
OssClient storage = OssFactory.instance();
try (InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
byte[] content = IoUtil.readBytes(inputStream);
return new MockMultipartFile(
sysOss.getFileName(),
sysOss.getOriginalName(),
MediaType.APPLICATION_OCTET_STREAM_VALUE,
content
);
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
private LambdaQueryWrapper<SysOss> buildQueryWrapper(SysOssBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<SysOss> lqw = Wrappers.lambdaQuery();
lqw.like(StringUtils.isNotBlank(bo.getFileName()), SysOss::getFileName, bo.getFileName());
lqw.like(StringUtils.isNotBlank(bo.getOriginalName()), SysOss::getOriginalName, bo.getOriginalName());
lqw.eq(StringUtils.isNotBlank(bo.getFileSuffix()), SysOss::getFileSuffix, bo.getFileSuffix());
lqw.eq(StringUtils.isNotBlank(bo.getUrl()), SysOss::getUrl, bo.getUrl());
lqw.between(params.get("beginCreateTime") != null && params.get("endCreateTime") != null,
SysOss::getCreateTime, params.get("beginCreateTime"), params.get("endCreateTime"));
lqw.eq(ObjectUtil.isNotNull(bo.getCreateBy()), SysOss::getCreateBy, bo.getCreateBy());
lqw.eq(StringUtils.isNotBlank(bo.getService()), SysOss::getService, bo.getService());
return lqw;
@Override
public SysOssVo upload(MultipartFile file) {
String originalfileName = file.getOriginalFilename();
String suffix = StringUtils.substring(originalfileName, originalfileName.lastIndexOf("."),
originalfileName.length());
OssClient storage = OssFactory.instance();
UploadResult uploadResult;
try {
uploadResult = storage.uploadSuffix(file.getBytes(), suffix, file.getContentType());
} catch (IOException e) {
throw new ServiceException(e.getMessage());
}
// 保存文件信息
SysOss oss = new SysOss();
oss.setUrl(uploadResult.getUrl());
oss.setFileSuffix(suffix);
oss.setFileName(uploadResult.getFilename());
oss.setOriginalName(originalfileName);
oss.setService(storage.getConfigKey());
baseMapper.insert(oss);
SysOssVo sysOssVo = MapstructUtils.convert(oss, SysOssVo.class);
return this.matchingUrl(sysOssVo);
}
@Cacheable(cacheNames = CacheNames.SYS_OSS, key = "#ossId")
@Override
public SysOssVo getById(Long ossId) {
return baseMapper.selectVoById(ossId);
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
// 做一些业务上的校验,判断是否需要校验
}
List<SysOss> list = baseMapper.selectBatchIds(ids);
for (SysOss sysOss : list) {
OssClient storage = OssFactory.instance(sysOss.getService());
storage.delete(sysOss.getUrl());
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
public void download(Long ossId, HttpServletResponse response) throws IOException {
SysOssVo sysOss = SpringUtils.getAopProxy(this).getById(ossId);
if (ObjectUtil.isNull(sysOss)) {
throw new ServiceException("文件数据不存在!");
}
FileUtils.setAttachmentResponseHeader(response, sysOss.getOriginalName());
response.setContentType(MediaType.APPLICATION_OCTET_STREAM_VALUE + "; charset=UTF-8");
OssClient storage = OssFactory.instance();
try(InputStream inputStream = storage.getObjectContent(sysOss.getUrl())) {
int available = inputStream.available();
IoUtil.copy(inputStream, response.getOutputStream(), available);
response.setContentLength(available);
} catch (Exception e) {
throw new ServiceException(e.getMessage());
}
}
@Override
public SysOssVo upload(MultipartFile file) {
String originalfileName = file.getOriginalFilename();
String suffix = StringUtils.substring(originalfileName, originalfileName.lastIndexOf("."), originalfileName.length());
OssClient storage = OssFactory.instance();
UploadResult uploadResult;
try {
uploadResult = storage.uploadSuffix(file.getBytes(), suffix, file.getContentType());
} catch (IOException e) {
throw new ServiceException(e.getMessage());
}
// 保存文件信息
SysOss oss = new SysOss();
oss.setUrl(uploadResult.getUrl());
oss.setFileSuffix(suffix);
oss.setFileName(uploadResult.getFilename());
oss.setOriginalName(originalfileName);
oss.setService(storage.getConfigKey());
baseMapper.insert(oss);
SysOssVo sysOssVo = MapstructUtils.convert(oss, SysOssVo.class);
return this.matchingUrl(sysOssVo);
}
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
// 做一些业务上的校验,判断是否需要校验
}
List<SysOss> list = baseMapper.selectBatchIds(ids);
for (SysOss sysOss : list) {
OssClient storage = OssFactory.instance(sysOss.getService());
storage.delete(sysOss.getUrl());
}
return baseMapper.deleteBatchIds(ids) > 0;
}
/**
* 匹配Url
*
* @param oss OSS对象
* @return oss 匹配Url的OSS对象
*/
private SysOssVo matchingUrl(SysOssVo oss) {
OssClient storage = OssFactory.instance(oss.getService());
// 仅修改桶类型为 private 的URL临时URL时长为120s
if (AccessPolicyType.PRIVATE == storage.getAccessPolicy()) {
oss.setUrl(storage.getPrivateUrl(oss.getFileName(), 120));
}
return oss;
/**
* 匹配Url
*
* @param oss OSS对象
* @return oss 匹配Url的OSS对象
*/
private SysOssVo matchingUrl(SysOssVo oss) {
OssClient storage = OssFactory.instance(oss.getService());
// 仅修改桶类型为 private 的URL临时URL时长为120s
if (AccessPolicyType.PRIVATE == storage.getAccessPolicy()) {
oss.setUrl(storage.getPrivateUrl(oss.getFileName(), 120));
}
return oss;
}
}

View File

@@ -52,7 +52,7 @@ public class KnowledgeController extends BaseController {
private final IKnowledgeFragmentService fragmentService;
private final PdfImageExtractService pdfImageExtractService;
// private final PdfImageExtractService pdfImageExtractService;
/**
* 根据用户信息查询本地知识库
@@ -170,11 +170,11 @@ public class KnowledgeController extends BaseController {
* @param file PDF文件
* @return 文件名称和图片内容
*/
@PostMapping("/extract-images")
@Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
public R<List<PdfFileContentResult>> extractImages(
@RequestPart("file") MultipartFile file
) throws IOException {
return R.ok(pdfImageExtractService.extractImages(file));
}
// @PostMapping("/extract-images")
// @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
// public R<List<PdfFileContentResult>> extractImages(
// @RequestPart("file") MultipartFile file
// ) throws IOException {
// return R.ok(pdfImageExtractService.extractImages(file));
// }
}

View File

@@ -0,0 +1,390 @@
package org.ruoyi.chat.service.knowledge;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import org.ruoyi.chain.loader.ResourceLoaderFactory;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.domain.KnowledgeAttach;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.KnowledgeFragment;
import org.ruoyi.domain.KnowledgeInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.StoreEmbeddingBo;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.mapper.KnowledgeAttachMapper;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.system.service.ISysOssService;
import org.ruoyi.utils.ZipUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
/**
* @Description:
* @Date: 2025/5/15 下午4:29
*/
@Service
@RequiredArgsConstructor
public class DealFileService {
private static final Logger log = LoggerFactory.getLogger(DealFileService.class);
private final KnowledgeInfoMapper baseMapper;
private final VectorStoreService vectorStoreService;
private final ResourceLoaderFactory resourceLoaderFactory;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper attachMapper;
private final IChatModelService chatModelService;
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
@Async
public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
try {
//锁定数据 更改VectorStatus 到进行中
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId())
) == 0) {
return;
}
List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList(
new LambdaQueryWrapper<KnowledgeFragment>()
.eq(KnowledgeFragment::getKid, attachItem.getKid())
.eq(KnowledgeFragment::getDocId, attachItem.getDocId())
);
if (ObjectUtil.isEmpty(knowledgeFragments)) {
throw new Exception("文件段落为空");
}
List<String> fids = knowledgeFragments.stream()
.map(KnowledgeFragment::getFid)
.collect(Collectors.toList());
if (ObjectUtil.isEmpty(fids)) {
throw new Exception("fids 为空");
}
List<String> chunkList = knowledgeFragments.stream()
.map(KnowledgeFragment::getContent)
.collect(Collectors.toList());
if (ObjectUtil.isEmpty(chunkList)) {
throw new Exception("chunkList 为空");
}
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getId, attachItem.getKid()));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(
knowledgeInfoVo.getEmbeddingModelName());
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(attachItem.getKid());
storeEmbeddingBo.setDocId(attachItem.getDocId());
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
//设置处理完成
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getId, attachItem.getId()));
} catch (Exception e) {
//设置处理失败
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
.set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getId, attachItem.getId()));
throw new RuntimeException(e);
}
}
@Async
public void dealPicStatus(KnowledgeAttach attachItem) throws Exception {
try {
//锁定数据 更改picStatus 到进行中
if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId())
) == 0) {
return;
}
//获取附件
if (ObjectUtil.isEmpty(attachItem.getOssId())) {
log.error("==========OssId 为空attachItem={}", attachItem);
throw new Exception("OssId 为空");
}
//获取oss文件
MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
//拆解出图片ZIP
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
aiApiUrl, aiApiKey);
byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
//解压zip得到图片文件
MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
//上传文件到OSS写入表
for (MultipartFile file : multipartFiles) {
//先查找是否有相同图片名称,先做删除
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getKid, attachItem.getKid())
.eq(KnowledgeAttachPic::getAid, attachItem.getId())
.eq(KnowledgeAttachPic::getDocName, file.getOriginalFilename())
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> ossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(ossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
SysOssVo upload = ossService.upload(file);
KnowledgeAttachPic entity = new KnowledgeAttachPic();
entity.setKid(attachItem.getKid());
entity.setAid(String.valueOf(attachItem.getId()));
entity.setDocName(file.getOriginalFilename());
entity.setDocType(
file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1));
entity.setOssId(upload.getOssId());
int[] ints = extractPageNumbers(file.getOriginalFilename());
if (ObjectUtil.isNotEmpty(ints)) {
assert ints != null;
if (ints.length == 2) {
entity.setPageNum(ints[0]);
entity.setIndexNum(ints[1]);
}
}
picMapper.insert(entity);
}
//设置处理完成
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId()));
} catch (Exception e) {
//设置处理失败
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
.set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, attachItem.getId()));
throw new RuntimeException(e);
}
}
@Async
public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
try {
//锁定数据 更改 getPicAnysStatus 到进行中
if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getId, picItem.getId())
) == 0) {
return;
}
SysOssVo ossVo = ossService.getById(picItem.getOssId());
if (ObjectUtil.isNotEmpty(ossVo)) {
String fileStr = ossService.downloadByByte(picItem.getOssId());
//调用第三方 分析图片内容
PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
serviceUrl,
aiApiUrl, aiApiKey);
List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
new String[]{fileStr});
if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
for (PdfFileContentResult resultItem : pdfFileContentResults) {
//图片解析内容回写到pic表
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getContent, parseContent(resultItem.getContent()))
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttachPic::getId, picItem.getId()));
//将图片解析内容 写入段落表 fragment
KnowledgeAttachVo knowledgeAttachVo = attachMapper.selectVoById(picItem.getAid());
if (ObjectUtil.isNotEmpty(knowledgeAttachVo)) {
String fid = RandomUtil.randomString(10);
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
knowledgeFragment.setKid(knowledgeAttachVo.getKid());
knowledgeFragment.setDocId(knowledgeAttachVo.getDocId());
knowledgeFragment.setFid(fid);
knowledgeFragment.setIdx(0);
knowledgeFragment.setContent(parseContent(resultItem.getContent()));
knowledgeFragment.setCreateTime(new Date());
fragmentMapper.insert(knowledgeFragment);
//更新attach表需要所有图片都处理完毕
// 查询非30状态完成状态的记录数量
long nonStatus30Count = picMapper.selectCount(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.ne(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttachPic::getAid, picItem.getAid())
);
if (nonStatus30Count == 0) {
// 执行表更新操作
attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
.set(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getId, picItem.getAid()));
}
}
}
}
}
} catch (Exception e) {
//失败
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40)
.set(KnowledgeAttachPic::getRemark, picItem.getRemark() + e.getMessage())
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
.eq(KnowledgeAttachPic::getId, picItem.getId()));
throw new RuntimeException(e);
} finally {
//将图片分析失败的数据 重新设置状态
picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
.set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
}
}
/**
* 从文件名中提取page后面的两个数字
*
* @param fileName 文件名
* @return 包含两个数字的数组如果未找到则返回null
*/
public static int[] extractPageNumbers(String fileName) {
// 查找"page_"的位置
int pageIndex = fileName.indexOf("page_");
if (pageIndex == -1) {
return null;
}
// 从"page_"后开始截取
String afterPage = fileName.substring(pageIndex + 5);
// 按下划线分割
String[] parts = afterPage.split("_");
if (parts.length >= 2) {
try {
// 提取两个数字
int firstNumber = Integer.parseInt(parts[0]);
// 对于第二个数字,需要去掉可能的文件扩展名
String secondPart = parts[1];
int dotIndex = secondPart.indexOf(".");
if (dotIndex != -1) {
secondPart = secondPart.substring(0, dotIndex);
}
int secondNumber = Integer.parseInt(secondPart);
return new int[]{firstNumber, secondNumber};
} catch (NumberFormatException e) {
return null;
}
}
return null;
}
public static String parseContent(String jsonString) {
try {
// 创建ObjectMapper实例
ObjectMapper objectMapper = new ObjectMapper();
// 解析JSON字符串
JsonNode rootNode = objectMapper.readTree(jsonString);
// 获取choices数组的第一个元素
JsonNode choicesNode = rootNode.get("choices");
if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
// 获取第一个choice
JsonNode firstChoice = choicesNode.get(0);
// 获取message节点
JsonNode messageNode = firstChoice.get("message");
if (messageNode != null) {
// 获取content字段的值
JsonNode contentNode = messageNode.get("content");
if (contentNode != null) {
return contentNode.asText();
}
}
}
return "无法找到content内容";
} catch (Exception e) {
e.printStackTrace();
return "解析JSON时发生错误: " + e.getMessage();
}
}
}

View File

@@ -1,44 +1,63 @@
package org.ruoyi.chat.service.knowledge;
import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.loader.ResourceLoader;
import org.ruoyi.chain.loader.ResourceLoaderFactory;
import org.ruoyi.common.core.domain.model.LoginUser;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.constant.FileType;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.ChatModel;
import org.ruoyi.domain.KnowledgeAttach;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.KnowledgeFragment;
import org.ruoyi.domain.KnowledgeInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.KnowledgeInfoBo;
import org.ruoyi.domain.bo.KnowledgeInfoUploadBo;
import org.ruoyi.domain.bo.StoreEmbeddingBo;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.mapper.KnowledgeAttachMapper;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.utils.ZipUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import org.ruoyi.system.service.ISysOssService;
import java.io.IOException;
import java.util.*;
/**
* 知识库Service业务层处理
*
@@ -49,216 +68,330 @@ import java.util.*;
@Service
public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class);
private final KnowledgeInfoMapper baseMapper;
private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class);
private final KnowledgeInfoMapper baseMapper;
private final VectorStoreService vectorStoreService;
private final VectorStoreService vectorStoreService;
private final ResourceLoaderFactory resourceLoaderFactory;
private final ResourceLoaderFactory resourceLoaderFactory;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper attachMapper;
private final KnowledgeAttachMapper attachMapper;
private final IChatModelService chatModelService;
private final IChatModelService chatModelService;
/**
* 查询知识库
*/
@Override
public KnowledgeInfoVo queryById(Long id){
return baseMapper.selectVoById(id);
private final ISysOssService ossService;
// private final PdfImageExtractService pdfImageExtractService;
private final KnowledgeAttachPicMapper picMapper;
private final DealFileService dealFileService;
@Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
/**
* 查询知识库
*/
@Override
public KnowledgeInfoVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库列表
*/
@Override
public TableDataInfo<KnowledgeInfoVo> queryPageList(KnowledgeInfoBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeInfo> lqw = buildQueryWrapper(bo);
Page<KnowledgeInfoVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库列表
*/
@Override
public List<KnowledgeInfoVo> queryList(KnowledgeInfoBo bo) {
LambdaQueryWrapper<KnowledgeInfo> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeInfo> buildQueryWrapper(KnowledgeInfoBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeInfo> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeInfo::getKid, bo.getKid());
lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid());
lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname());
lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare());
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription,
bo.getDescription());
lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator,
bo.getKnowledgeSeparator());
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator,
bo.getQuestionSeparator());
lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar());
lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit());
lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize());
return lqw;
}
/**
* 新增知识库
*/
@Override
public Boolean insertByBo(KnowledgeInfoBo bo) {
KnowledgeInfo add = MapstructUtils.convert(bo, KnowledgeInfo.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库
*/
@Override
public Boolean updateByBo(KnowledgeInfoBo bo) {
KnowledgeInfo update = MapstructUtils.convert(bo, KnowledgeInfo.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeInfo entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
@Transactional(rollbackFor = Exception.class)
public void saveOne(KnowledgeInfoBo bo) {
KnowledgeInfo knowledgeInfo = MapstructUtils.convert(bo, KnowledgeInfo.class);
if (StringUtils.isBlank(bo.getKid())) {
String kid = RandomUtil.randomString(10);
if (knowledgeInfo != null) {
knowledgeInfo.setKid(kid);
knowledgeInfo.setUid(LoginHelper.getLoginUser().getUserId());
}
baseMapper.insert(knowledgeInfo);
if (knowledgeInfo != null) {
vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()),
bo.getVectorModelName());
}
} else {
baseMapper.updateById(knowledgeInfo);
}
}
@Override
@Transactional(rollbackFor = Exception.class)
public void removeKnowledge(String id) {
Map<String, Object> map = new HashMap<>();
map.put("kid", id);
List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map);
check(knowledgeInfoList);
// 删除向量库信息
knowledgeInfoList.forEach(knowledgeInfoVo -> {
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),
knowledgeInfoVo.getVectorModelName());
});
// 删除附件和知识片段
fragmentMapper.deleteByMap(map);
List<KnowledgeAttachVo> knowledgeAttachVos = attachMapper.selectVoByMap(map);
if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(tossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
}
attachMapper.deleteByMap(map);
// 删除知识库
baseMapper.deleteByMap(map);
}
@Override
public void upload(KnowledgeInfoUploadBo bo) {
storeContent(bo.getFile(), bo.getKid());
}
public void storeContent(MultipartFile file, String kid) {
if (file == null || file.isEmpty()) {
throw new IllegalArgumentException("File cannot be null or empty");
}
/**
* 查询知识库列表
*/
@Override
public TableDataInfo<KnowledgeInfoVo> queryPageList(KnowledgeInfoBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeInfo> lqw = buildQueryWrapper(bo);
Page<KnowledgeInfoVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库列表
*/
@Override
public List<KnowledgeInfoVo> queryList(KnowledgeInfoBo bo) {
LambdaQueryWrapper<KnowledgeInfo> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeInfo> buildQueryWrapper(KnowledgeInfoBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeInfo> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeInfo::getKid, bo.getKid());
lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid());
lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname());
lqw.eq(bo.getShare() != null, KnowledgeInfo::getShare, bo.getShare());
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription, bo.getDescription());
lqw.eq(StringUtils.isNotBlank(bo.getKnowledgeSeparator()), KnowledgeInfo::getKnowledgeSeparator, bo.getKnowledgeSeparator());
lqw.eq(StringUtils.isNotBlank(bo.getQuestionSeparator()), KnowledgeInfo::getQuestionSeparator, bo.getQuestionSeparator());
lqw.eq(bo.getOverlapChar() != null, KnowledgeInfo::getOverlapChar, bo.getOverlapChar());
lqw.eq(bo.getRetrieveLimit() != null, KnowledgeInfo::getRetrieveLimit, bo.getRetrieveLimit());
lqw.eq(bo.getTextBlockSize() != null, KnowledgeInfo::getTextBlockSize, bo.getTextBlockSize());
return lqw;
}
/**
* 新增知识库
*/
@Override
public Boolean insertByBo(KnowledgeInfoBo bo) {
KnowledgeInfo add = MapstructUtils.convert(bo, KnowledgeInfo.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
SysOssVo uploadDto = null;
String fileName = file.getOriginalFilename();
List<String> chunkList = new ArrayList<>();
KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
knowledgeAttach.setKid(kid);
String docId = RandomUtil.randomString(10);
knowledgeAttach.setDocId(docId);
knowledgeAttach.setDocName(fileName);
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1));
String content = "";
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(
knowledgeAttach.getDocType());
List<String> fids = new ArrayList<>();
try {
content = resourceLoader.getContent(file.getInputStream());
chunkList = resourceLoader.getChunkList(content, kid);
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
if (CollUtil.isNotEmpty(chunkList)) {
// Upload file to OSS
uploadDto = ossService.upload(file);
for (int i = 0; i < chunkList.size(); i++) {
String fid = RandomUtil.randomString(10);
fids.add(fid);
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
knowledgeFragment.setKid(kid);
knowledgeFragment.setDocId(docId);
knowledgeFragment.setFid(fid);
knowledgeFragment.setIdx(i);
knowledgeFragment.setContent(chunkList.get(i));
knowledgeFragment.setCreateTime(new Date());
knowledgeFragmentList.add(knowledgeFragment);
}
return flag;
}
fragmentMapper.insertBatch(knowledgeFragmentList);
} catch (IOException e) {
log.error("保存知识库信息失败!{}", e.getMessage());
}
/**
* 修改知识库
*/
@Override
public Boolean updateByBo(KnowledgeInfoBo bo) {
KnowledgeInfo update = MapstructUtils.convert(bo, KnowledgeInfo.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
knowledgeAttach.setContent(content);
knowledgeAttach.setCreateTime(new Date());
if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
knowledgeAttach.setOssId(uploadDto.getOssId());
//只有pdf文件 才需要拆解图片和分析图片内容
if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
} else {
knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
}
//所有文件上传后,都需要同步到向量数据库
knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
}
attachMapper.insert(knowledgeAttach);
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeInfo entity){
//TODO 做一些数据校验,如唯一约束
}
/**
* 检查用户是否有删除知识库权限
*
* @param knowledgeInfoList 知识库列表
*/
public void check(List<KnowledgeInfoVo> knowledgeInfoList) {
LoginUser loginUser = LoginHelper.getLoginUser();
for (KnowledgeInfoVo knowledgeInfoVo : knowledgeInfoList) {
if (!knowledgeInfoVo.getUid().equals(loginUser.getUserId())) {
throw new SecurityException("权限不足");
}
}
}
/**
* 批量删除知识库
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
/**
* 第一步 定时 拆解PDF文件中的图片
*/
@Scheduled(fixedDelay = 15000) // 每3秒执行一次
public void dealKnowledgeAttachPic() throws Exception {
//处理 拆解PDF文件中的图片的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
dealFileService.dealPicStatus(attachItem);
}
}
}
@Override
@Transactional(rollbackFor = Exception.class)
public void saveOne(KnowledgeInfoBo bo) {
KnowledgeInfo knowledgeInfo = MapstructUtils.convert(bo, KnowledgeInfo.class);
if (StringUtils.isBlank(bo.getKid())){
String kid = RandomUtil.randomString(10);
if (knowledgeInfo != null) {
knowledgeInfo.setKid(kid);
knowledgeInfo.setUid(LoginHelper.getLoginUser().getUserId());
}
baseMapper.insert(knowledgeInfo);
if (knowledgeInfo != null) {
vectorStoreService.createSchema(String.valueOf(knowledgeInfo.getId()),bo.getVectorModelName());
}
}else {
baseMapper.updateById(knowledgeInfo);
}
/**
* 第二步 定时 解析图片内容
*/
@Scheduled(fixedDelay = 15000)
public void dealKnowledgeAttachPicAnys() throws Exception {
//获取未处理的图片记录
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
dealFileService.dealPicAnysStatus(picItem);
}
}
}
@Override
@Transactional(rollbackFor = Exception.class)
public void removeKnowledge(String id) {
Map<String,Object> map = new HashMap<>();
map.put("kid",id);
List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map);
check(knowledgeInfoList);
// 删除向量库信息
knowledgeInfoList.forEach(knowledgeInfoVo -> {
vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName());
});
// 删除附件和知识片段
fragmentMapper.deleteByMap(map);
attachMapper.deleteByMap(map);
// 删除知识库
baseMapper.deleteByMap(map);
}
@Override
public void upload(KnowledgeInfoUploadBo bo) {
storeContent(bo.getFile(), bo.getKid());
}
public void storeContent(MultipartFile file, String kid) {
String fileName = file.getOriginalFilename();
List<String> chunkList = new ArrayList<>();
KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
knowledgeAttach.setKid(kid);
String docId = RandomUtil.randomString(10);
knowledgeAttach.setDocId(docId);
knowledgeAttach.setDocName(fileName);
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1));
String content = "";
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType());
List<String> fids = new ArrayList<>();
try {
content = resourceLoader.getContent(file.getInputStream());
chunkList = resourceLoader.getChunkList(content, kid);
List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
if (CollUtil.isNotEmpty(chunkList)) {
for (int i = 0; i < chunkList.size(); i++) {
String fid = RandomUtil.randomString(10);
fids.add(fid);
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
knowledgeFragment.setKid(kid);
knowledgeFragment.setDocId(docId);
knowledgeFragment.setFid(fid);
knowledgeFragment.setIdx(i);
knowledgeFragment.setContent(chunkList.get(i));
knowledgeFragment.setCreateTime(new Date());
knowledgeFragmentList.add(knowledgeFragment);
}
}
fragmentMapper.insertBatch(knowledgeFragmentList);
} catch (IOException e) {
log.error("保存知识库信息失败!{}", e.getMessage());
}
knowledgeAttach.setContent(content);
knowledgeAttach.setCreateTime(new Date());
attachMapper.insert(knowledgeAttach);
// 通过kid查询知识库信息
KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
.eq(KnowledgeInfo::getId, kid));
// 通过向量模型查询模型信息
ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName());
StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
storeEmbeddingBo.setKid(kid);
storeEmbeddingBo.setDocId(docId);
storeEmbeddingBo.setFids(fids);
storeEmbeddingBo.setChunkList(chunkList);
storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
vectorStoreService.storeEmbeddings(storeEmbeddingBo);
/**
* 第三步 定时 处理 附件上传后上传向量数据库
*/
@Scheduled(fixedDelay = 30000) // 每3秒执行一次
public void dealKnowledgeAttachVector() throws Exception {
//处理 需要上传向量数据库的记录
List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttach>()
.eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
.eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
);
log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
for (KnowledgeAttach attachItem : knowledgeAttaches) {
dealFileService.dealVectorStatus(attachItem);
}
}
}
/**
* 检查用户是否有删除知识库权限
*
* @param knowledgeInfoList 知识库列表
*/
public void check(List<KnowledgeInfoVo> knowledgeInfoList){
LoginUser loginUser = LoginHelper.getLoginUser();
for (KnowledgeInfoVo knowledgeInfoVo : knowledgeInfoList) {
if(!knowledgeInfoVo.getUid().equals(loginUser.getUserId())){
throw new SecurityException("权限不足");
}
}
}
}

View File

@@ -1,6 +1,10 @@
ALTER TABLE `knowledge_attach`
ADD COLUMN `oss_id` bigint(20) NOT NULL COMMENT '对象存储主键' AFTER `remark`,
ADD COLUMN `pic_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '拆解图片状态10未开始20进行中30已完成' AFTER `oss_id`,
ADD COLUMN `pic_anys_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '分析图片状态10未开始20进行中30已完成' AFTER `pic_status`,
ADD COLUMN `vector_status` tinyint(1) NOT NULL DEFAULT 10 COMMENT '写入向量数据库状态10未开始20进行中30已完成' AFTER `pic_anys_status`,
DROP PRIMARY KEY,
ADD PRIMARY KEY (`id`) USING BTREE;
ALTER TABLE `knowledge_attach`
MODIFY COLUMN `remark` text CHARACTER SET utf8mb4 COLLATE utf8mb4_general_ci NULL COMMENT '备注' AFTER `update_time`;