feat:pdf文件解析图片和分析图片,上传向量数据库都修改成 成异步处理

This commit is contained in:
zhouweiyi
2025-05-15 17:46:38 +08:00
parent 584212c569
commit c6ffbcb3cf
24 changed files with 1585 additions and 414 deletions

View File

@@ -11,6 +11,8 @@ public class DealStatus {
public static final Integer STATUS_20 = 20;
//已结束
public static final Integer STATUS_30 = 30;
//处理失败
public static final Integer STATUS_40 = 40;
}

View File

@@ -0,0 +1,81 @@
package org.ruoyi.domain;
import com.baomidou.mybatisplus.annotation.*;
import lombok.Data;
import lombok.EqualsAndHashCode;
import java.io.Serial;
import org.ruoyi.core.domain.BaseEntity;
/**
* 知识库附件图片列对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@EqualsAndHashCode(callSuper = true)
@TableName("knowledge_attach_pic")
public class KnowledgeAttachPic extends BaseEntity {
@Serial
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@TableId(value = "id")
private Long id;
/**
* 知识库id
*/
private String kid;
/**
* 附件id
*/
private String aid;
/**
* 文档名称
*/
private String docName;
/**
* 文档类型
*/
private String docType;
/**
* 文档内容
*/
private String content;
/**
* 所在页数
*/
private Integer pageNum;
/**
* 所在页index
*/
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
private Integer picAnysStatus;
/**
* 对象存储主键
*/
private Long ossId;
/**
* 备注
*/
private String remark;
}

View File

@@ -0,0 +1,90 @@
package org.ruoyi.domain.bo;
import org.ruoyi.common.core.validate.AddGroup;
import org.ruoyi.common.core.validate.EditGroup;
import org.ruoyi.core.domain.BaseEntity;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import lombok.EqualsAndHashCode;
import jakarta.validation.constraints.*;
import org.ruoyi.domain.KnowledgeAttachPic;
/**
* 知识库附件图片列业务对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeAttachPic.class, reverseConvertGenerate = false)
public class KnowledgeAttachPicBo extends BaseEntity {
/**
* 主键
*/
@NotNull(message = "主键不能为空", groups = {EditGroup.class})
private Long id;
/**
* 知识库id
*/
@NotBlank(message = "知识库id不能为空", groups = {AddGroup.class, EditGroup.class})
private String kid;
/**
* 附件id
*/
@NotBlank(message = "附件id不能为空", groups = {AddGroup.class, EditGroup.class})
private String aid;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空", groups = {AddGroup.class, EditGroup.class})
private String docName;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空", groups = {AddGroup.class, EditGroup.class})
private String docType;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空", groups = {AddGroup.class, EditGroup.class})
private String content;
/**
* 所在页数
*/
@NotNull(message = "所在页数不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer pageNum;
/**
* 所在页index
*/
@NotNull(message = "所在页index不能为空", groups = {AddGroup.class, EditGroup.class})
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@NotNull(message = "分析图片状态10未开始20进行中30已完成不能为空", groups = {AddGroup.class,
EditGroup.class})
private Integer picAnysStatus;
/**
* 对象存储主键
*/
@NotNull(message = "对象存储主键不能为空", groups = {AddGroup.class, EditGroup.class})
private Long ossId;
/**
* 备注
*/
@NotBlank(message = "备注不能为空", groups = {AddGroup.class, EditGroup.class})
private String remark;
}

View File

@@ -0,0 +1,92 @@
package org.ruoyi.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import java.io.Serial;
import java.io.Serializable;
import org.ruoyi.domain.KnowledgeAttachPic;
/**
* 知识库附件图片列视图对象 knowledge_attach_pic
*
* @author Albert
* @date 2025-05-15
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeAttachPic.class)
public class KnowledgeAttachPicVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
* 主键
*/
@ExcelProperty(value = "主键")
private Long id;
/**
* 知识库id
*/
@ExcelProperty(value = "知识库id")
private String kid;
/**
* 附件id
*/
@ExcelProperty(value = "附件id")
private String aid;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
/**
* 所在页数
*/
@ExcelProperty(value = "所在页数")
private Integer pageNum;
/**
* 所在页index
*/
@ExcelProperty(value = "所在页index")
private Integer indexNum;
/**
* 分析图片状态10未开始20进行中30已完成
*/
@ExcelProperty(value = "分析图片状态10未开始20进行中30已完成")
private Integer picAnysStatus;
/**
* 对象存储主键
*/
@ExcelProperty(value = "对象存储主键")
private Long ossId;
/**
* 备注
*/
@ExcelProperty(value = "备注")
private String remark;
}

View File

@@ -0,0 +1,15 @@
package org.ruoyi.mapper;
import org.ruoyi.core.mapper.BaseMapperPlus;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
/**
* 知识库附件图片列Mapper接口
*
* @author Albert
* @date 2025-05-15
*/
public interface KnowledgeAttachPicMapper extends BaseMapperPlus<KnowledgeAttachPic, KnowledgeAttachPicVo> {
}

View File

@@ -0,0 +1,47 @@
package org.ruoyi.service;
import java.util.Collection;
import java.util.List;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.bo.KnowledgeAttachPicBo;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
/**
* 知识库附件图片列Service接口
*
* @author Albert
* @date 2025-05-15
*/
public interface IKnowledgeAttachPicService {
/**
* 查询知识库附件图片列
*/
KnowledgeAttachPicVo queryById(Long id);
/**
* 查询知识库附件图片列列表
*/
TableDataInfo<KnowledgeAttachPicVo> queryPageList(KnowledgeAttachPicBo bo, PageQuery pageQuery);
/**
* 查询知识库附件图片列列表
*/
List<KnowledgeAttachPicVo> queryList(KnowledgeAttachPicBo bo);
/**
* 新增知识库附件图片列
*/
Boolean insertByBo(KnowledgeAttachPicBo bo);
/**
* 修改知识库附件图片列
*/
Boolean updateByBo(KnowledgeAttachPicBo bo);
/**
* 校验并批量删除知识库附件图片列信息
*/
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
}

View File

@@ -0,0 +1,123 @@
package org.ruoyi.service.impl;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.core.page.PageQuery;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.bo.KnowledgeAttachPicBo;
import org.ruoyi.domain.vo.KnowledgeAttachPicVo;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.service.IKnowledgeAttachPicService;
import org.springframework.stereotype.Service;
import java.util.List;
import java.util.Map;
import java.util.Collection;
/**
* 知识库附件图片列Service业务层处理
*
* @author ageerle
* @date 2025-05-15
*/
@RequiredArgsConstructor
@Service
public class KnowledgeAttachPicServiceImpl implements IKnowledgeAttachPicService {
private final KnowledgeAttachPicMapper baseMapper;
/**
* 查询知识库附件图片列
*/
@Override
public KnowledgeAttachPicVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件图片列列表
*/
@Override
public TableDataInfo<KnowledgeAttachPicVo> queryPageList(KnowledgeAttachPicBo bo,
PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttachPic> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachPicVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件图片列列表
*/
@Override
public List<KnowledgeAttachPicVo> queryList(KnowledgeAttachPicBo bo) {
LambdaQueryWrapper<KnowledgeAttachPic> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttachPic> buildQueryWrapper(KnowledgeAttachPicBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttachPic> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttachPic::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getAid()), KnowledgeAttachPic::getAid, bo.getAid());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttachPic::getDocName,
bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttachPic::getDocType,
bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttachPic::getContent,
bo.getContent());
lqw.eq(bo.getPageNum() != null, KnowledgeAttachPic::getPageNum, bo.getPageNum());
lqw.eq(bo.getIndexNum() != null, KnowledgeAttachPic::getIndexNum, bo.getIndexNum());
lqw.eq(bo.getPicAnysStatus() != null, KnowledgeAttachPic::getPicAnysStatus,
bo.getPicAnysStatus());
lqw.eq(bo.getOssId() != null, KnowledgeAttachPic::getOssId, bo.getOssId());
return lqw;
}
/**
* 新增知识库附件图片列
*/
@Override
public Boolean insertByBo(KnowledgeAttachPicBo bo) {
KnowledgeAttachPic add = MapstructUtils.convert(bo, KnowledgeAttachPic.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件图片列
*/
@Override
public Boolean updateByBo(KnowledgeAttachPicBo bo) {
KnowledgeAttachPic update = MapstructUtils.convert(bo, KnowledgeAttachPic.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttachPic entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件图片列
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
}

View File

@@ -1,5 +1,7 @@
package org.ruoyi.service.impl;
import cn.hutool.core.util.ObjectUtil;
import java.util.stream.Collectors;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.core.page.TableDataInfo;
@@ -8,8 +10,11 @@ import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.system.service.ISysOssService;
import org.springframework.stereotype.Service;
import org.ruoyi.domain.bo.KnowledgeAttachBo;
@@ -33,99 +38,130 @@ import java.util.Collection;
@Service
public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id){
return baseMapper.selectVoById(id);
private final ISysOssService ossService;
private final KnowledgeAttachPicMapper picMapper;
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id) {
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity) {
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if (isValid) {
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
@Override
public void removeKnowledgeAttach(String docId) {
Map<String, Object> map = new HashMap<>();
map.put("doc_id", docId);
List<KnowledgeAttachVo> knowledgeAttachVos = baseMapper.selectVoByMap(map);
if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
Collection<Long> ossIds = knowledgeAttachVos.stream()
.map(KnowledgeAttachVo::getOssId)
.collect(Collectors.toList());
//删除oss
ossService.deleteWithValidByIds(ossIds, false);
//删除图片oss
List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
new LambdaQueryWrapper<KnowledgeAttachPic>()
.in(KnowledgeAttachPic::getKid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
.collect(Collectors.toList()))
.in(KnowledgeAttachPic::getAid,
knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
.collect(Collectors.toList()))
);
if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
Collection<Long> tossIds = knowledgeAttachPics.stream()
.map(KnowledgeAttachPic::getOssId)
.collect(Collectors.toList());
ossService.deleteWithValidByIds(tossIds, false);
List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
.collect(Collectors.toList());
picMapper.deleteByIds(collect);
}
}
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity){
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
public void removeKnowledgeAttach(String docId) {
Map<String,Object> map = new HashMap<>();
map.put("doc_id",docId);
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
@Override
public String translationByFile(MultipartFile file, String targetLanguage) {
@Override
public String translationByFile(MultipartFile file, String targetLanguage) {
/*String fileName = file.getOriginalFilename();
String docType = fileName.substring(fileName.lastIndexOf(".")+1);
String content = "";
@@ -173,6 +209,6 @@ public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
throw new BaseException("调用大模型失败,请检查密钥是否正确!");
}
return chatCompletionResponse.getChoices().get(0).getMessage().getContent().toString();*/
return "接口开发中!";
}
return "接口开发中!";
}
}

View File

@@ -4,6 +4,8 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
@@ -23,16 +25,19 @@ import org.springframework.web.multipart.MultipartFile;
/**
* PDF图片提取服务实现类
*/
@Service
//@Service
@Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
@Data
@AllArgsConstructor
//public class PdfImageExtractServiceImpl implements PdfImageExtractService {
public class PdfImageExtractServiceImpl {
@Value("${pdf.extract.service.url}")
// @Value("${pdf.extract.service.url}")
private String serviceUrl;
@Value("${pdf.extract.ai-api.url}")
// @Value("${pdf.extract.ai-api.url}")
private String aiApiUrl;
@Value("${pdf.extract.ai-api.key}")
private String aiApiKey ;
// @Value("${pdf.extract.ai-api.key}")
private String aiApiKey;
private final OkHttpClient client = new Builder()
.connectTimeout(100, TimeUnit.SECONDS)
@@ -43,7 +48,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
@Override
// @Override
public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
throws IOException {
// 构建multipart请求
@@ -77,7 +82,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
* @return 文件内容结果列表
* @throws IOException 如果API调用过程中发生错误
*/
@Override
// @Override
public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
List<PdfFileContentResult> results = new ArrayList<>();
int i = 0;
@@ -110,6 +115,7 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
// 执行请求
try {
log.info("=============call=" + ++i);
Response response = client.newCall(request).execute();
log.info("=============response=" + response);
if (!response.isSuccessful()) {
@@ -126,11 +132,10 @@ public class PdfImageExtractServiceImpl implements PdfImageExtractService {
throw new RuntimeException(e);
}
}
return results;
}
@Override
// @Override
public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
String format = "png";
boolean allowDuplicates = true;

View File

@@ -97,7 +97,7 @@ public class VectorStoreServiceImpl implements VectorStoreService {
for (int i = 0; i < chunkList.size(); i++) {
Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("kid", storeEmbeddingBo.getKid());
dataSchema.put("docId", storeEmbeddingBo.getKid());
dataSchema.put("docId", storeEmbeddingBo.getDocId());
dataSchema.put("fid", storeEmbeddingBo.getFids().get(i));
Embedding embedding = embeddingModel.embed(chunkList.get(i)).content();
TextSegment segment = TextSegment.from(chunkList.get(i));

View File

@@ -11,6 +11,8 @@ import java.util.Base64;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;
/**
* ZIP文件处理工具类
@@ -92,4 +94,90 @@ public class ZipUtils {
}
return base64Contents.toArray(new String[0]);
}
/**
* 解压ZIP文件并返回MultipartFile数组
*
* @param zipData ZIP文件的字节数组
* @return MultipartFile数组
* @throws IOException 如果解压过程中发生错误
*/
public static MultipartFile[] unzipToMultipartFiles(byte[] zipData) throws IOException {
List<MultipartFile> multipartFiles = new ArrayList<>();
try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
ZipInputStream zis = new ZipInputStream(bis)) {
ZipEntry zipEntry;
while ((zipEntry = zis.getNextEntry()) != null) {
if (!zipEntry.isDirectory()) {
// 读取文件内容到内存
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buffer = new byte[4096];
int read;
while ((read = zis.read(buffer)) != -1) {
baos.write(buffer, 0, read);
}
// 创建MultipartFile对象
String fileName = zipEntry.getName();
byte[] content = baos.toByteArray();
String contentType = determineContentType(fileName);
MultipartFile multipartFile = new MockMultipartFile(
fileName, // 文件名
fileName, // 原始文件名
contentType, // 内容类型
content // 文件内容
);
multipartFiles.add(multipartFile);
}
zis.closeEntry();
}
}
return multipartFiles.toArray(new MultipartFile[0]);
}
/**
* 根据文件名确定内容类型
*
* @param fileName 文件名
* @return 内容类型
*/
private static String determineContentType(String fileName) {
String extension = "";
int i = fileName.lastIndexOf('.');
if (i > 0) {
extension = fileName.substring(i + 1).toLowerCase();
}
switch (extension) {
case "txt":
return "text/plain";
case "html":
case "htm":
return "text/html";
case "pdf":
return "application/pdf";
case "jpg":
case "jpeg":
return "image/jpeg";
case "png":
return "image/png";
case "gif":
return "image/gif";
case "doc":
case "docx":
return "application/msword";
case "xls":
case "xlsx":
return "application/vnd.ms-excel";
case "xml":
return "application/xml";
case "json":
return "application/json";
default:
return "application/octet-stream";
}
}
}