From 5494181ae029caf7f2eeb6ecefb60f040ccd5f25 Mon Sep 17 00:00:00 2001 From: Chuck1sn Date: Thu, 26 Jun 2025 15:54:38 +0800 Subject: [PATCH] init library --- backend/build.gradle.kts | 7 +- .../mjga/config/ai/ChatModelInitializer.java | 12 +- .../mjga/config/ai/EmbeddingInitializer.java | 10 - .../com/zl/mjga/controller/AiController.java | 20 +- .../zl/mjga/controller/LibraryController.java | 66 ++++++ .../mjga/dto/library/LibraryDocUpdateDto.java | 6 + .../zl/mjga/dto/library/LibraryUpsertDto.java | 5 + .../mjga/repository/LibraryDocRepository.java | 14 ++ .../LibraryDocSegmentRepository.java | 14 ++ .../zl/mjga/repository/LibraryRepository.java | 15 ++ .../com/zl/mjga/service/EmbeddingService.java | 92 --------- .../java/com/zl/mjga/service/RagService.java | 189 ++++++++++++++++++ .../com/zl/mjga/service/UploadService.java | 2 +- .../db/migration/V1_0_0__init_table.sql | 14 +- .../db/migration/V1_0_3__init_library.sql | 28 +++ .../db/migration/test/V1_0_0__init_table.sql | 14 +- .../migration/test/V1_0_3__init_library.sql | 28 +++ 17 files changed, 394 insertions(+), 142 deletions(-) create mode 100644 backend/src/main/java/com/zl/mjga/controller/LibraryController.java create mode 100644 backend/src/main/java/com/zl/mjga/dto/library/LibraryDocUpdateDto.java create mode 100644 backend/src/main/java/com/zl/mjga/dto/library/LibraryUpsertDto.java create mode 100644 backend/src/main/java/com/zl/mjga/repository/LibraryDocRepository.java create mode 100644 backend/src/main/java/com/zl/mjga/repository/LibraryDocSegmentRepository.java create mode 100644 backend/src/main/java/com/zl/mjga/repository/LibraryRepository.java delete mode 100644 backend/src/main/java/com/zl/mjga/service/EmbeddingService.java create mode 100644 backend/src/main/java/com/zl/mjga/service/RagService.java create mode 100644 backend/src/main/resources/db/migration/V1_0_3__init_library.sql create mode 100644 backend/src/test/resources/db/migration/test/V1_0_3__init_library.sql diff --git a/backend/build.gradle.kts b/backend/build.gradle.kts index 4464b35..5b872e7 100644 --- a/backend/build.gradle.kts +++ b/backend/build.gradle.kts @@ -64,7 +64,7 @@ dependencies { implementation("dev.langchain4j:langchain4j-open-ai:1.0.0") implementation("dev.langchain4j:langchain4j-pgvector:1.0.1-beta6") implementation("dev.langchain4j:langchain4j-community-zhipu-ai:1.0.1-beta6") - implementation("dev.langchain4j:langchain4j-easy-rag:1.1.0-beta7") + implementation("dev.langchain4j:langchain4j-document-parser-apache-tika:1.1.0-beta7") implementation("dev.langchain4j:langchain4j-document-loader-amazon-s3:1.1.0-beta7") implementation("io.projectreactor:reactor-core:3.7.6") testImplementation("org.testcontainers:junit-jupiter:$testcontainersVersion") @@ -169,11 +169,6 @@ jooq { } } forcedTypes { - forcedType { - name = "varchar" - includeExpression = ".*" - includeTypes = "JSONB?" - } forcedType { name = "varchar" includeExpression = ".*" diff --git a/backend/src/main/java/com/zl/mjga/config/ai/ChatModelInitializer.java b/backend/src/main/java/com/zl/mjga/config/ai/ChatModelInitializer.java index ea27d5f..4c9727d 100644 --- a/backend/src/main/java/com/zl/mjga/config/ai/ChatModelInitializer.java +++ b/backend/src/main/java/com/zl/mjga/config/ai/ChatModelInitializer.java @@ -5,6 +5,7 @@ import com.zl.mjga.service.LlmService; import dev.langchain4j.community.model.zhipu.ZhipuAiStreamingChatModel; import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.memory.chat.MessageWindowChatMemory; +import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.openai.OpenAiStreamingChatModel; import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever; import dev.langchain4j.service.AiServices; @@ -59,12 +60,19 @@ public class ChatModelInitializer { @DependsOn("flywayInitializer") public AiChatAssistant zhiPuChatAssistant( ZhipuAiStreamingChatModel zhipuChatModel, - EmbeddingStore zhiPuLibraryEmbeddingStore) { + EmbeddingStore zhiPuLibraryEmbeddingStore, + EmbeddingModel zhipuEmbeddingModel) { return AiServices.builder(AiChatAssistant.class) .streamingChatModel(zhipuChatModel) .systemMessageProvider(chatMemoryId -> promptConfiguration.getSystem()) .chatMemoryProvider(memoryId -> MessageWindowChatMemory.withMaxMessages(10)) - .contentRetriever(EmbeddingStoreContentRetriever.from(zhiPuLibraryEmbeddingStore)) + .contentRetriever( + EmbeddingStoreContentRetriever.builder() + .embeddingStore(zhiPuLibraryEmbeddingStore) + .embeddingModel(zhipuEmbeddingModel) + .minScore(0.75) + .maxResults(5) + .build()) .build(); } diff --git a/backend/src/main/java/com/zl/mjga/config/ai/EmbeddingInitializer.java b/backend/src/main/java/com/zl/mjga/config/ai/EmbeddingInitializer.java index 891b63d..79aefd0 100644 --- a/backend/src/main/java/com/zl/mjga/config/ai/EmbeddingInitializer.java +++ b/backend/src/main/java/com/zl/mjga/config/ai/EmbeddingInitializer.java @@ -8,7 +8,6 @@ import dev.langchain4j.data.document.loader.amazon.s3.AwsCredentials; import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.store.embedding.EmbeddingStore; -import dev.langchain4j.store.embedding.EmbeddingStoreIngestor; import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore; import jakarta.annotation.Resource; import lombok.RequiredArgsConstructor; @@ -75,15 +74,6 @@ public class EmbeddingInitializer { .build(); } - @Bean - public EmbeddingStoreIngestor zhipuEmbeddingStoreIngestor( - EmbeddingStore zhiPuLibraryEmbeddingStore, EmbeddingModel zhipuEmbeddingModel) { - return EmbeddingStoreIngestor.builder() - .embeddingModel(zhipuEmbeddingModel) - .embeddingStore(zhiPuLibraryEmbeddingStore) - .build(); - } - @Bean public AmazonS3DocumentLoader amazonS3DocumentLoader(MinIoConfig minIoConfig) { return AmazonS3DocumentLoader.builder() diff --git a/backend/src/main/java/com/zl/mjga/controller/AiController.java b/backend/src/main/java/com/zl/mjga/controller/AiController.java index 969a035..df74e09 100644 --- a/backend/src/main/java/com/zl/mjga/controller/AiController.java +++ b/backend/src/main/java/com/zl/mjga/controller/AiController.java @@ -7,9 +7,8 @@ import com.zl.mjga.dto.ai.LlmVm; import com.zl.mjga.exception.BusinessException; import com.zl.mjga.repository.*; import com.zl.mjga.service.AiChatService; -import com.zl.mjga.service.EmbeddingService; import com.zl.mjga.service.LlmService; -import com.zl.mjga.service.UploadService; +import com.zl.mjga.service.RagService; import dev.langchain4j.service.TokenStream; import jakarta.validation.Valid; import java.security.Principal; @@ -25,7 +24,6 @@ import org.springframework.http.HttpStatus; import org.springframework.http.MediaType; import org.springframework.security.access.prepost.PreAuthorize; import org.springframework.web.bind.annotation.*; -import org.springframework.web.multipart.MultipartFile; import reactor.core.publisher.Flux; import reactor.core.publisher.Sinks; @@ -37,14 +35,13 @@ public class AiController { private final AiChatService aiChatService; private final LlmService llmService; - private final EmbeddingService embeddingService; + private final RagService ragService; private final UserRepository userRepository; private final DepartmentRepository departmentRepository; private final PositionRepository positionRepository; private final RoleRepository repository; private final PermissionRepository permissionRepository; private final RoleRepository roleRepository; - private final UploadService uploadService; @PostMapping(value = "/action/execute", produces = MediaType.TEXT_EVENT_STREAM_VALUE) public Flux actionExecute(Principal principal, @RequestBody String userMessage) { @@ -112,7 +109,7 @@ public class AiController { if (!aiLlmConfig.getEnable()) { throw new BusinessException("命令模型未启用,请开启后再试。"); } - return embeddingService.searchAction(message); + return ragService.searchAction(message); } @PreAuthorize("hasAuthority(T(com.zl.mjga.model.urp.EPermission).WRITE_USER_ROLE_PERMISSION)") @@ -172,15 +169,4 @@ public class AiController { void createNewConversation(Principal principal) { aiChatService.evictChatMemory(principal.getName()); } - - @PostMapping( - value = "/library/upload", - consumes = MediaType.MULTIPART_FORM_DATA_VALUE, - produces = MediaType.TEXT_PLAIN_VALUE) - public String uploadLibraryFile(@RequestPart("file") MultipartFile multipartFile) - throws Exception { - String objectName = uploadService.uploadLibraryFile(multipartFile); - embeddingService.ingestDocument(objectName); - return objectName; - } } diff --git a/backend/src/main/java/com/zl/mjga/controller/LibraryController.java b/backend/src/main/java/com/zl/mjga/controller/LibraryController.java new file mode 100644 index 0000000..b8e2b4d --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/controller/LibraryController.java @@ -0,0 +1,66 @@ +package com.zl.mjga.controller; + +import com.zl.mjga.dto.library.LibraryDocUpdateDto; +import com.zl.mjga.dto.library.LibraryUpsertDto; +import com.zl.mjga.repository.LibraryDocRepository; +import com.zl.mjga.repository.LibraryRepository; +import com.zl.mjga.service.RagService; +import com.zl.mjga.service.UploadService; +import jakarta.validation.Valid; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.jooq.generated.mjga.tables.pojos.Library; +import org.jooq.generated.mjga.tables.pojos.LibraryDoc; +import org.springframework.http.MediaType; +import org.springframework.web.bind.annotation.*; +import org.springframework.web.multipart.MultipartFile; + +@RestController +@RequestMapping("/library") +@RequiredArgsConstructor +@Slf4j +public class LibraryController { + + private final UploadService uploadService; + private final RagService ragService; + private final LibraryRepository libraryRepository; + private final LibraryDocRepository libraryDocRepository; + + @PostMapping("/upsert") + public void upsertLibrary(@RequestBody @Valid LibraryUpsertDto libraryUpsertDto) { + Library library = new Library(); + library.setId(libraryUpsertDto.id()); + library.setName(libraryUpsertDto.name()); + libraryRepository.merge(library); + } + + @DeleteMapping + public void deleteLibrary(@RequestParam Long libraryId) { + ragService.deleteLibraryBy(libraryId); + } + + @DeleteMapping("/doc") + public void deleteLibraryDoc(@RequestParam Long libraryDocId) { + ragService.deleteDocBy(libraryDocId); + } + + @PutMapping("/doc") + public void updateLibraryDoc(@RequestBody @Valid LibraryDocUpdateDto libraryDocUpdateDto) { + LibraryDoc libraryDoc = new LibraryDoc(); + libraryDoc.setId(libraryDocUpdateDto.id()); + libraryDoc.setEnable(libraryDocUpdateDto.enable()); + libraryDocRepository.merge(libraryDoc); + } + + @PostMapping( + value = "/upload", + consumes = MediaType.MULTIPART_FORM_DATA_VALUE, + produces = MediaType.TEXT_PLAIN_VALUE) + public String uploadLibraryDoc( + @RequestPart("libraryId") Long libraryId, @RequestPart("file") MultipartFile multipartFile) + throws Exception { + String objectName = uploadService.uploadLibraryDoc(multipartFile); + ragService.ingestDocumentBy(libraryId, objectName, multipartFile.getOriginalFilename()); + return objectName; + } +} diff --git a/backend/src/main/java/com/zl/mjga/dto/library/LibraryDocUpdateDto.java b/backend/src/main/java/com/zl/mjga/dto/library/LibraryDocUpdateDto.java new file mode 100644 index 0000000..18a59cb --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/dto/library/LibraryDocUpdateDto.java @@ -0,0 +1,6 @@ +package com.zl.mjga.dto.library; + +import jakarta.validation.constraints.NotEmpty; +import jakarta.validation.constraints.NotNull; + +public record LibraryDocUpdateDto(@NotNull Long id, @NotEmpty Boolean enable) {} diff --git a/backend/src/main/java/com/zl/mjga/dto/library/LibraryUpsertDto.java b/backend/src/main/java/com/zl/mjga/dto/library/LibraryUpsertDto.java new file mode 100644 index 0000000..67636dc --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/dto/library/LibraryUpsertDto.java @@ -0,0 +1,5 @@ +package com.zl.mjga.dto.library; + +import jakarta.validation.constraints.NotEmpty; + +public record LibraryUpsertDto(Long id, @NotEmpty String name) {} diff --git a/backend/src/main/java/com/zl/mjga/repository/LibraryDocRepository.java b/backend/src/main/java/com/zl/mjga/repository/LibraryDocRepository.java new file mode 100644 index 0000000..8e7deb5 --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/repository/LibraryDocRepository.java @@ -0,0 +1,14 @@ +package com.zl.mjga.repository; + +import org.jooq.Configuration; +import org.jooq.generated.mjga.tables.daos.LibraryDocDao; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Repository; + +@Repository +public class LibraryDocRepository extends LibraryDocDao { + @Autowired + public LibraryDocRepository(Configuration configuration) { + super(configuration); + } +} diff --git a/backend/src/main/java/com/zl/mjga/repository/LibraryDocSegmentRepository.java b/backend/src/main/java/com/zl/mjga/repository/LibraryDocSegmentRepository.java new file mode 100644 index 0000000..1afd329 --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/repository/LibraryDocSegmentRepository.java @@ -0,0 +1,14 @@ +package com.zl.mjga.repository; + +import org.jooq.Configuration; +import org.jooq.generated.mjga.tables.daos.LibraryDocSegmentDao; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Repository; + +@Repository +public class LibraryDocSegmentRepository extends LibraryDocSegmentDao { + @Autowired + public LibraryDocSegmentRepository(Configuration configuration) { + super(configuration); + } +} diff --git a/backend/src/main/java/com/zl/mjga/repository/LibraryRepository.java b/backend/src/main/java/com/zl/mjga/repository/LibraryRepository.java new file mode 100644 index 0000000..31e039d --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/repository/LibraryRepository.java @@ -0,0 +1,15 @@ +package com.zl.mjga.repository; + +import org.jooq.Configuration; +import org.jooq.generated.mjga.tables.daos.LibraryDao; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.stereotype.Repository; + +@Repository +public class LibraryRepository extends LibraryDao { + + @Autowired + public LibraryRepository(Configuration configuration) { + super(configuration); + } +} diff --git a/backend/src/main/java/com/zl/mjga/service/EmbeddingService.java b/backend/src/main/java/com/zl/mjga/service/EmbeddingService.java deleted file mode 100644 index 8c9eaaa..0000000 --- a/backend/src/main/java/com/zl/mjga/service/EmbeddingService.java +++ /dev/null @@ -1,92 +0,0 @@ -package com.zl.mjga.service; - -import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey; - -import com.zl.mjga.config.ai.ZhiPuEmbeddingModelConfig; -import com.zl.mjga.config.minio.MinIoConfig; -import com.zl.mjga.model.urp.Actions; -import dev.langchain4j.data.document.Document; -import dev.langchain4j.data.document.Metadata; -import dev.langchain4j.data.document.loader.amazon.s3.AmazonS3DocumentLoader; -import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; -import dev.langchain4j.data.embedding.Embedding; -import dev.langchain4j.data.segment.TextSegment; -import dev.langchain4j.model.embedding.EmbeddingModel; -import dev.langchain4j.store.embedding.*; -import dev.langchain4j.store.embedding.filter.Filter; -import io.minio.errors.*; -import jakarta.annotation.PostConstruct; -import java.util.HashMap; -import java.util.Map; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.springframework.context.annotation.Configuration; -import org.springframework.stereotype.Service; - -@Configuration -@RequiredArgsConstructor -@Service -@Slf4j -public class EmbeddingService { - - private final EmbeddingModel zhipuEmbeddingModel; - - private final EmbeddingStore zhiPuEmbeddingStore; - - private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig; - - private final AmazonS3DocumentLoader amazonS3DocumentLoader; - - private final EmbeddingStoreIngestor zhiPuEmbeddingStoreIngestor; - - private final MinIoConfig minIoConfig; - - public void ingestDocument(String objectName) { - Document document = - amazonS3DocumentLoader.loadDocument( - minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser()); - IngestionResult ingest = zhiPuEmbeddingStoreIngestor.ingest(document); - log.info("Ingest document finished {}", ingest); - } - - public Map searchAction(String message) { - Map result = new HashMap<>(); - EmbeddingSearchRequest embeddingSearchRequest = - EmbeddingSearchRequest.builder() - .queryEmbedding(zhipuEmbeddingModel.embed(message).content()) - .minScore(0.89) - .build(); - EmbeddingSearchResult embeddingSearchResult = - zhiPuEmbeddingStore.search(embeddingSearchRequest); - if (!embeddingSearchResult.matches().isEmpty()) { - Metadata metadata = embeddingSearchResult.matches().getFirst().embedded().metadata(); - result.put(Actions.INDEX_KEY, metadata.getString(Actions.INDEX_KEY)); - } - return result; - } - - @PostConstruct - public void initActionIndex() { - if (!zhiPuEmbeddingModelConfig.getEnable()) { - return; - } - for (Actions action : Actions.values()) { - Embedding queryEmbedding = zhipuEmbeddingModel.embed(action.getContent()).content(); - Filter createUserFilter = metadataKey(Actions.INDEX_KEY).isEqualTo(action.getCode()); - EmbeddingSearchRequest embeddingSearchRequest = - EmbeddingSearchRequest.builder() - .queryEmbedding(queryEmbedding) - .filter(createUserFilter) - .build(); - EmbeddingSearchResult embeddingSearchResult = - zhiPuEmbeddingStore.search(embeddingSearchRequest); - if (embeddingSearchResult.matches().isEmpty()) { - TextSegment segment = - TextSegment.from( - action.getContent(), Metadata.metadata(Actions.INDEX_KEY, action.getCode())); - Embedding embedding = zhipuEmbeddingModel.embed(segment).content(); - zhiPuEmbeddingStore.add(embedding, segment); - } - } - } -} diff --git a/backend/src/main/java/com/zl/mjga/service/RagService.java b/backend/src/main/java/com/zl/mjga/service/RagService.java new file mode 100644 index 0000000..22eff95 --- /dev/null +++ b/backend/src/main/java/com/zl/mjga/service/RagService.java @@ -0,0 +1,189 @@ +package com.zl.mjga.service; + +import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.zl.mjga.config.ai.ZhiPuEmbeddingModelConfig; +import com.zl.mjga.config.minio.MinIoConfig; +import com.zl.mjga.model.urp.Actions; +import com.zl.mjga.repository.LibraryDocRepository; +import com.zl.mjga.repository.LibraryRepository; +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.Metadata; +import dev.langchain4j.data.document.loader.amazon.s3.AmazonS3DocumentLoader; +import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; +import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter; +import dev.langchain4j.data.embedding.Embedding; +import dev.langchain4j.data.segment.TextSegment; +import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.output.Response; +import dev.langchain4j.store.embedding.*; +import dev.langchain4j.store.embedding.filter.Filter; +import jakarta.annotation.PostConstruct; +import java.time.Instant; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.collections4.CollectionUtils; +import org.apache.commons.lang3.RandomStringUtils; +import org.jooq.JSON; +import org.jooq.generated.mjga.tables.daos.LibraryDocSegmentDao; +import org.jooq.generated.mjga.tables.pojos.LibraryDoc; +import org.jooq.generated.mjga.tables.pojos.LibraryDocSegment; +import org.springframework.context.annotation.Configuration; +import org.springframework.stereotype.Service; +import org.springframework.transaction.annotation.Transactional; + +@Configuration +@RequiredArgsConstructor +@Service +@Slf4j +public class RagService { + + private final EmbeddingModel zhipuEmbeddingModel; + + private final EmbeddingStore zhiPuEmbeddingStore; + + private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig; + + private final AmazonS3DocumentLoader amazonS3DocumentLoader; + + private final MinIoConfig minIoConfig; + + private final LibraryRepository libraryRepository; + + private final LibraryDocRepository libraryDocRepository; + + private final LibraryDocSegmentDao libraryDocSegmentDao; + + public void deleteLibraryBy(Long libraryId) { + List libraryDocs = libraryDocRepository.fetchByLibId(libraryId); + List docIds = libraryDocs.stream().map(LibraryDoc::getId).toList(); + for (Long docId : docIds) { + deleteDocBy(docId); + } + libraryRepository.deleteById(libraryId); + } + + public void deleteDocBy(Long docId) { + List libraryDocSegments = libraryDocSegmentDao.fetchByDocId(docId); + List embeddingIdList = + libraryDocSegments.stream().map(LibraryDocSegment::getEmbeddingId).toList(); + if (CollectionUtils.isNotEmpty(embeddingIdList)) { + zhiPuEmbeddingStore.removeAll(embeddingIdList); + } + libraryDocRepository.deleteById(docId); + } + + @Transactional(rollbackFor = Throwable.class) + public void ingestDocumentBy(Long libraryId, String objectName, String originalName) + throws Exception { + Document document = + amazonS3DocumentLoader.loadDocument( + minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser()); + ArrayList embeddingIds = new ArrayList<>(); + try { + Long libraryDocId = createLibraryDoc(objectName, originalName, document.metadata().toMap()); + DocumentByParagraphSplitter documentByParagraphSplitter = + new DocumentByParagraphSplitter(1000, 200); + documentByParagraphSplitter + .split(document) + .forEach( + textSegment -> { + Metadata metadata = textSegment.metadata(); + metadata.put("libraryId", libraryId); + Response embed = zhipuEmbeddingModel.embed(textSegment); + Integer tokenUsage = embed.tokenUsage().totalTokenCount(); + Embedding vector = embed.content(); + String embeddingId = zhiPuEmbeddingStore.add(vector, textSegment); + embeddingIds.add(embeddingId); + createLibraryDocSegment(textSegment, libraryDocId, tokenUsage, embeddingId); + }); + } catch (Exception e) { + log.error( + "文档采集失败。libraryId {} objectName {} originalName {}", + libraryId, + objectName, + originalName, + e); + if (CollectionUtils.isNotEmpty(embeddingIds)) { + zhiPuEmbeddingStore.removeAll(embeddingIds); + } + throw e; + } + } + + private void createLibraryDocSegment( + TextSegment textSegment, Long libraryDocId, Integer tokenUsage, String embeddingId) { + LibraryDocSegment libraryDocSegment = new LibraryDocSegment(); + libraryDocSegment.setDocId(libraryDocId); + libraryDocSegment.setContent(textSegment.text()); + libraryDocSegment.setTokenUsage(tokenUsage); + libraryDocSegment.setEmbeddingId(embeddingId); + libraryDocSegmentDao.insert(); + } + + private Long createLibraryDoc(String objectName, String originalName, Map meta) + throws JsonProcessingException { + String identify = + String.format( + "%d%s_%s", + Instant.now().toEpochMilli(), + RandomStringUtils.insecure().nextAlphabetic(6), + originalName); + LibraryDoc libraryDoc = new LibraryDoc(); + ObjectMapper objectMapper = new ObjectMapper(); + String metaJson = objectMapper.writeValueAsString(meta); + libraryDoc.setMeta(JSON.valueOf(metaJson)); + libraryDoc.setPath(objectName); + libraryDoc.setName(originalName); + libraryDoc.setIdentify(identify); + libraryDocRepository.insert(libraryDoc); + return libraryDocRepository.fetchOneByIdentify(identify).getId(); + } + + public Map searchAction(String message) { + Map result = new HashMap<>(); + EmbeddingSearchRequest embeddingSearchRequest = + EmbeddingSearchRequest.builder() + .queryEmbedding(zhipuEmbeddingModel.embed(message).content()) + .minScore(0.89) + .build(); + EmbeddingSearchResult embeddingSearchResult = + zhiPuEmbeddingStore.search(embeddingSearchRequest); + if (!embeddingSearchResult.matches().isEmpty()) { + Metadata metadata = embeddingSearchResult.matches().getFirst().embedded().metadata(); + result.put(Actions.INDEX_KEY, metadata.getString(Actions.INDEX_KEY)); + } + return result; + } + + @PostConstruct + public void initActionIndex() { + if (!zhiPuEmbeddingModelConfig.getEnable()) { + return; + } + for (Actions action : Actions.values()) { + Embedding queryEmbedding = zhipuEmbeddingModel.embed(action.getContent()).content(); + Filter createUserFilter = metadataKey(Actions.INDEX_KEY).isEqualTo(action.getCode()); + EmbeddingSearchRequest embeddingSearchRequest = + EmbeddingSearchRequest.builder() + .queryEmbedding(queryEmbedding) + .filter(createUserFilter) + .build(); + EmbeddingSearchResult embeddingSearchResult = + zhiPuEmbeddingStore.search(embeddingSearchRequest); + if (embeddingSearchResult.matches().isEmpty()) { + TextSegment segment = + TextSegment.from( + action.getContent(), Metadata.metadata(Actions.INDEX_KEY, action.getCode())); + Embedding embedding = zhipuEmbeddingModel.embed(segment).content(); + zhiPuEmbeddingStore.add(embedding, segment); + } + } + } +} diff --git a/backend/src/main/java/com/zl/mjga/service/UploadService.java b/backend/src/main/java/com/zl/mjga/service/UploadService.java index 654dfad..2f01f6e 100644 --- a/backend/src/main/java/com/zl/mjga/service/UploadService.java +++ b/backend/src/main/java/com/zl/mjga/service/UploadService.java @@ -58,7 +58,7 @@ public class UploadService { return objectName; } - public String uploadLibraryFile(MultipartFile multipartFile) throws Exception { + public String uploadLibraryDoc(MultipartFile multipartFile) throws Exception { String originalFilename = multipartFile.getOriginalFilename(); if (StringUtils.isEmpty(originalFilename)) { throw new BusinessException("文件名不能为空"); diff --git a/backend/src/main/resources/db/migration/V1_0_0__init_table.sql b/backend/src/main/resources/db/migration/V1_0_0__init_table.sql index e76556f..2eb34b4 100644 --- a/backend/src/main/resources/db/migration/V1_0_0__init_table.sql +++ b/backend/src/main/resources/db/migration/V1_0_0__init_table.sql @@ -4,7 +4,7 @@ CREATE TABLE mjga.user ( id BIGSERIAL PRIMARY KEY, username VARCHAR NOT NULL UNIQUE, avatar VARCHAR, - create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, password VARCHAR NOT NULL, enable BOOLEAN NOT NULL DEFAULT TRUE ); @@ -39,7 +39,7 @@ CREATE TABLE mjga.user_role_map ( CREATE TABLE mjga.department ( id BIGSERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL UNIQUE, + name VARCHAR NOT NULL UNIQUE, parent_id BIGINT, FOREIGN KEY (parent_id) REFERENCES mjga.department(id) @@ -56,7 +56,7 @@ CREATE TABLE mjga.user_department_map ( CREATE TABLE mjga.position ( id BIGSERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL UNIQUE + name VARCHAR NOT NULL UNIQUE ); CREATE TABLE mjga.user_position_map ( @@ -80,12 +80,12 @@ CREATE TYPE "llm_type_enum" AS ENUM ( CREATE TABLE mjga.ai_llm_config ( id BIGSERIAL NOT NULL UNIQUE, - name VARCHAR(255) NOT NULL UNIQUE, + name VARCHAR NOT NULL UNIQUE, code mjga.llm_code_enum NOT NULL UNIQUE, - model_name VARCHAR(255) NOT NULL, + model_name VARCHAR NOT NULL, type LLM_TYPE_ENUM NOT NULL, - api_key VARCHAR(255) NOT NULL, - url VARCHAR(255) NOT NULL, + api_key VARCHAR NOT NULL, + url VARCHAR NOT NULL, enable BOOLEAN NOT NULL DEFAULT true, priority SMALLINT NOT NULL DEFAULT 0, PRIMARY KEY(id) diff --git a/backend/src/main/resources/db/migration/V1_0_3__init_library.sql b/backend/src/main/resources/db/migration/V1_0_3__init_library.sql new file mode 100644 index 0000000..e1cdde8 --- /dev/null +++ b/backend/src/main/resources/db/migration/V1_0_3__init_library.sql @@ -0,0 +1,28 @@ +CREATE TABLE mjga.library ( + id BIGSERIAL PRIMARY KEY, + name VARCHAR NOT NULL UNIQUE, + data_count INTEGER NOT NULL DEFAULT 0, + create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE mjga.library_doc ( + id BIGSERIAL PRIMARY KEY, + lib_id BIGINT NOT NULL, + name VARCHAR NOT NULL, + identify VARCHAR NOT NULL UNIQUE, + path VARCHAR NOT NULL, + meta JSON NOT NULL, + enable BOOLEAN NOT NULL DEFAULT true, + create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMPTZ, + FOREIGN KEY (lib_id) REFERENCES mjga.library (id) ON DELETE CASCADE +); + +CREATE TABLE mjga.library_doc_segment ( + id BIGSERIAL PRIMARY KEY, + doc_id BIGINT NOT NULL, + embedding_id VARCHAR NOT NULL UNIQUE, + content TEXT, + token_usage INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (doc_id) REFERENCES mjga.library_doc (id) ON DELETE CASCADE +); \ No newline at end of file diff --git a/backend/src/test/resources/db/migration/test/V1_0_0__init_table.sql b/backend/src/test/resources/db/migration/test/V1_0_0__init_table.sql index a77b7de..daca81a 100644 --- a/backend/src/test/resources/db/migration/test/V1_0_0__init_table.sql +++ b/backend/src/test/resources/db/migration/test/V1_0_0__init_table.sql @@ -4,7 +4,7 @@ CREATE TABLE mjga.user ( id BIGSERIAL PRIMARY KEY, username VARCHAR NOT NULL UNIQUE, avatar VARCHAR, - create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP, + create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, password VARCHAR NOT NULL, enable BOOLEAN NOT NULL DEFAULT TRUE ); @@ -39,7 +39,7 @@ CREATE TABLE mjga.user_role_map ( CREATE TABLE mjga.department ( id BIGSERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL UNIQUE, + name VARCHAR NOT NULL UNIQUE, parent_id BIGINT, FOREIGN KEY (parent_id) REFERENCES mjga.department(id) @@ -56,7 +56,7 @@ CREATE TABLE mjga.user_department_map ( CREATE TABLE mjga.position ( id BIGSERIAL PRIMARY KEY, - name VARCHAR(255) NOT NULL UNIQUE + name VARCHAR NOT NULL UNIQUE ); CREATE TABLE mjga.user_position_map ( @@ -80,12 +80,12 @@ CREATE TYPE "llm_type_enum" AS ENUM ( CREATE TABLE mjga.ai_llm_config ( id BIGSERIAL NOT NULL UNIQUE, - name VARCHAR(255) NOT NULL UNIQUE, + name VARCHAR NOT NULL UNIQUE, code mjga.llm_code_enum NOT NULL UNIQUE, - model_name VARCHAR(255) NOT NULL, + model_name VARCHAR NOT NULL, type LLM_TYPE_ENUM NOT NULL, - api_key VARCHAR(255) NOT NULL, - url VARCHAR(255) NOT NULL, + api_key VARCHAR NOT NULL, + url VARCHAR NOT NULL, enable BOOLEAN NOT NULL DEFAULT true, priority SMALLINT NOT NULL DEFAULT 0, PRIMARY KEY(id) diff --git a/backend/src/test/resources/db/migration/test/V1_0_3__init_library.sql b/backend/src/test/resources/db/migration/test/V1_0_3__init_library.sql new file mode 100644 index 0000000..e1cdde8 --- /dev/null +++ b/backend/src/test/resources/db/migration/test/V1_0_3__init_library.sql @@ -0,0 +1,28 @@ +CREATE TABLE mjga.library ( + id BIGSERIAL PRIMARY KEY, + name VARCHAR NOT NULL UNIQUE, + data_count INTEGER NOT NULL DEFAULT 0, + create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE mjga.library_doc ( + id BIGSERIAL PRIMARY KEY, + lib_id BIGINT NOT NULL, + name VARCHAR NOT NULL, + identify VARCHAR NOT NULL UNIQUE, + path VARCHAR NOT NULL, + meta JSON NOT NULL, + enable BOOLEAN NOT NULL DEFAULT true, + create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP, + update_time TIMESTAMPTZ, + FOREIGN KEY (lib_id) REFERENCES mjga.library (id) ON DELETE CASCADE +); + +CREATE TABLE mjga.library_doc_segment ( + id BIGSERIAL PRIMARY KEY, + doc_id BIGINT NOT NULL, + embedding_id VARCHAR NOT NULL UNIQUE, + content TEXT, + token_usage INTEGER NOT NULL DEFAULT 0, + FOREIGN KEY (doc_id) REFERENCES mjga.library_doc (id) ON DELETE CASCADE +); \ No newline at end of file