mirror of
https://github.com/ccmjga/zhilu-admin
synced 2026-04-03 10:56:09 +00:00
add async
This commit is contained in:
@@ -2,7 +2,9 @@ package com.zl.mjga;
|
|||||||
|
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
|
import org.springframework.scheduling.annotation.EnableAsync;
|
||||||
|
|
||||||
|
@EnableAsync
|
||||||
@SpringBootApplication(scanBasePackages = {"com.zl.mjga", "org.jooq.generated"})
|
@SpringBootApplication(scanBasePackages = {"com.zl.mjga", "org.jooq.generated"})
|
||||||
public class ApplicationService {
|
public class ApplicationService {
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,9 @@ public class LibraryController {
|
|||||||
@RequestPart("libraryId") Long libraryId, @RequestPart("file") MultipartFile multipartFile)
|
@RequestPart("libraryId") Long libraryId, @RequestPart("file") MultipartFile multipartFile)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
String objectName = uploadService.uploadLibraryDoc(multipartFile);
|
String objectName = uploadService.uploadLibraryDoc(multipartFile);
|
||||||
ragService.ingestDocumentBy(libraryId, objectName, multipartFile.getOriginalFilename());
|
Long libraryDocId =
|
||||||
|
ragService.createLibraryDocBy(libraryId, objectName, multipartFile.getOriginalFilename());
|
||||||
|
ragService.embeddingAndCreateDocSegment(libraryDocId, objectName);
|
||||||
return objectName;
|
return objectName;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -31,12 +31,14 @@ import lombok.extern.slf4j.Slf4j;
|
|||||||
import org.apache.commons.collections4.CollectionUtils;
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
import org.apache.commons.lang3.RandomStringUtils;
|
import org.apache.commons.lang3.RandomStringUtils;
|
||||||
import org.jooq.JSON;
|
import org.jooq.JSON;
|
||||||
|
import org.jooq.generated.mjga.enums.LibraryDocStatusEnum;
|
||||||
import org.jooq.generated.mjga.tables.daos.LibraryDocSegmentDao;
|
import org.jooq.generated.mjga.tables.daos.LibraryDocSegmentDao;
|
||||||
import org.jooq.generated.mjga.tables.pojos.LibraryDoc;
|
import org.jooq.generated.mjga.tables.pojos.LibraryDoc;
|
||||||
import org.jooq.generated.mjga.tables.pojos.LibraryDocSegment;
|
import org.jooq.generated.mjga.tables.pojos.LibraryDocSegment;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.scheduling.annotation.Async;
|
||||||
|
import org.springframework.security.core.context.SecurityContextHolder;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
|
||||||
|
|
||||||
@Configuration
|
@Configuration
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
@@ -79,62 +81,17 @@ public class RagService {
|
|||||||
libraryDocRepository.deleteById(docId);
|
libraryDocRepository.deleteById(docId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Transactional(rollbackFor = Throwable.class)
|
public Long createLibraryDocBy(Long libraryId, String objectName, String originalName)
|
||||||
public void ingestDocumentBy(Long libraryId, String objectName, String originalName)
|
|
||||||
throws Exception {
|
|
||||||
Document document =
|
|
||||||
amazonS3DocumentLoader.loadDocument(
|
|
||||||
minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser());
|
|
||||||
ArrayList<String> embeddingIds = new ArrayList<>();
|
|
||||||
try {
|
|
||||||
Long libraryDocId = createLibraryDoc(objectName, originalName, document.metadata().toMap());
|
|
||||||
DocumentByParagraphSplitter documentByParagraphSplitter =
|
|
||||||
new DocumentByParagraphSplitter(1000, 200);
|
|
||||||
documentByParagraphSplitter
|
|
||||||
.split(document)
|
|
||||||
.forEach(
|
|
||||||
textSegment -> {
|
|
||||||
Metadata metadata = textSegment.metadata();
|
|
||||||
metadata.put("libraryId", libraryId);
|
|
||||||
Response<Embedding> embed = zhipuEmbeddingModel.embed(textSegment);
|
|
||||||
Integer tokenUsage = embed.tokenUsage().totalTokenCount();
|
|
||||||
Embedding vector = embed.content();
|
|
||||||
String embeddingId = zhiPuEmbeddingStore.add(vector, textSegment);
|
|
||||||
embeddingIds.add(embeddingId);
|
|
||||||
createLibraryDocSegment(textSegment, libraryDocId, tokenUsage, embeddingId);
|
|
||||||
});
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.error(
|
|
||||||
"文档采集失败。libraryId {} objectName {} originalName {}",
|
|
||||||
libraryId,
|
|
||||||
objectName,
|
|
||||||
originalName,
|
|
||||||
e);
|
|
||||||
if (CollectionUtils.isNotEmpty(embeddingIds)) {
|
|
||||||
zhiPuEmbeddingStore.removeAll(embeddingIds);
|
|
||||||
}
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createLibraryDocSegment(
|
|
||||||
TextSegment textSegment, Long libraryDocId, Integer tokenUsage, String embeddingId) {
|
|
||||||
LibraryDocSegment libraryDocSegment = new LibraryDocSegment();
|
|
||||||
libraryDocSegment.setDocId(libraryDocId);
|
|
||||||
libraryDocSegment.setContent(textSegment.text());
|
|
||||||
libraryDocSegment.setTokenUsage(tokenUsage);
|
|
||||||
libraryDocSegment.setEmbeddingId(embeddingId);
|
|
||||||
libraryDocSegmentDao.insert();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Long createLibraryDoc(String objectName, String originalName, Map meta)
|
|
||||||
throws JsonProcessingException {
|
throws JsonProcessingException {
|
||||||
|
String username = SecurityContextHolder.getContext().getAuthentication().getName();
|
||||||
String identify =
|
String identify =
|
||||||
String.format(
|
String.format(
|
||||||
"%d%s_%s",
|
"%d%s_%s",
|
||||||
Instant.now().toEpochMilli(),
|
Instant.now().toEpochMilli(),
|
||||||
RandomStringUtils.insecure().nextAlphabetic(6),
|
RandomStringUtils.insecure().nextAlphabetic(6),
|
||||||
originalName);
|
originalName);
|
||||||
|
Map<String, String> meta = new HashMap<>();
|
||||||
|
meta.put("uploader", username);
|
||||||
LibraryDoc libraryDoc = new LibraryDoc();
|
LibraryDoc libraryDoc = new LibraryDoc();
|
||||||
ObjectMapper objectMapper = new ObjectMapper();
|
ObjectMapper objectMapper = new ObjectMapper();
|
||||||
String metaJson = objectMapper.writeValueAsString(meta);
|
String metaJson = objectMapper.writeValueAsString(meta);
|
||||||
@@ -142,10 +99,39 @@ public class RagService {
|
|||||||
libraryDoc.setPath(objectName);
|
libraryDoc.setPath(objectName);
|
||||||
libraryDoc.setName(originalName);
|
libraryDoc.setName(originalName);
|
||||||
libraryDoc.setIdentify(identify);
|
libraryDoc.setIdentify(identify);
|
||||||
|
libraryDoc.setLibId(libraryId);
|
||||||
|
libraryDoc.setStatus(LibraryDocStatusEnum.INDEXING);
|
||||||
|
libraryDoc.setEnable(Boolean.TRUE);
|
||||||
libraryDocRepository.insert(libraryDoc);
|
libraryDocRepository.insert(libraryDoc);
|
||||||
return libraryDocRepository.fetchOneByIdentify(identify).getId();
|
return libraryDocRepository.fetchOneByIdentify(identify).getId();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Async
|
||||||
|
public void embeddingAndCreateDocSegment(Long libraryDocId, String objectName) {
|
||||||
|
Document document =
|
||||||
|
amazonS3DocumentLoader.loadDocument(
|
||||||
|
minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser());
|
||||||
|
List<LibraryDocSegment> libraryDocSegments = new ArrayList<>();
|
||||||
|
DocumentByParagraphSplitter documentByParagraphSplitter =
|
||||||
|
new DocumentByParagraphSplitter(1000, 200);
|
||||||
|
documentByParagraphSplitter
|
||||||
|
.split(document)
|
||||||
|
.forEach(
|
||||||
|
textSegment -> {
|
||||||
|
Response<Embedding> embed = zhipuEmbeddingModel.embed(textSegment);
|
||||||
|
Integer tokenUsage = embed.tokenUsage().totalTokenCount();
|
||||||
|
Embedding vector = embed.content();
|
||||||
|
String embeddingId = zhiPuEmbeddingStore.add(vector, textSegment);
|
||||||
|
LibraryDocSegment libraryDocSegment = new LibraryDocSegment();
|
||||||
|
libraryDocSegment.setEmbeddingId(embeddingId);
|
||||||
|
libraryDocSegment.setContent(textSegment.text());
|
||||||
|
libraryDocSegment.setTokenUsage(tokenUsage);
|
||||||
|
libraryDocSegment.setDocId(libraryDocId);
|
||||||
|
libraryDocSegments.add(libraryDocSegment);
|
||||||
|
});
|
||||||
|
libraryDocSegmentDao.insert(libraryDocSegments);
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String, String> searchAction(String message) {
|
public Map<String, String> searchAction(String message) {
|
||||||
Map<String, String> result = new HashMap<>();
|
Map<String, String> result = new HashMap<>();
|
||||||
EmbeddingSearchRequest embeddingSearchRequest =
|
EmbeddingSearchRequest embeddingSearchRequest =
|
||||||
|
|||||||
@@ -5,6 +5,11 @@ CREATE TABLE mjga.library (
|
|||||||
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
);
|
);
|
||||||
|
|
||||||
|
CREATE TYPE mjga.library_doc_status_enum AS ENUM (
|
||||||
|
'SUCCESS',
|
||||||
|
'INDEXING'
|
||||||
|
);
|
||||||
|
|
||||||
CREATE TABLE mjga.library_doc (
|
CREATE TABLE mjga.library_doc (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
lib_id BIGINT NOT NULL,
|
lib_id BIGINT NOT NULL,
|
||||||
@@ -13,6 +18,7 @@ CREATE TABLE mjga.library_doc (
|
|||||||
path VARCHAR NOT NULL,
|
path VARCHAR NOT NULL,
|
||||||
meta JSON NOT NULL,
|
meta JSON NOT NULL,
|
||||||
enable BOOLEAN NOT NULL DEFAULT true,
|
enable BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
status mjga.library_doc_status_enum NOT NULL,
|
||||||
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
update_time TIMESTAMPTZ,
|
update_time TIMESTAMPTZ,
|
||||||
FOREIGN KEY (lib_id) REFERENCES mjga.library (id) ON DELETE CASCADE
|
FOREIGN KEY (lib_id) REFERENCES mjga.library (id) ON DELETE CASCADE
|
||||||
|
|||||||
Reference in New Issue
Block a user