From 4d70b49e615f2486e11da3f35af938e324097dbe Mon Sep 17 00:00:00 2001 From: Chuck1sn Date: Fri, 27 Jun 2025 18:47:17 +0800 Subject: [PATCH] fix parse bugs --- .../com/zl/mjga/controller/LibraryController.java | 7 ++++--- .../src/main/java/com/zl/mjga/service/RagService.java | 11 ++++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/backend/src/main/java/com/zl/mjga/controller/LibraryController.java b/backend/src/main/java/com/zl/mjga/controller/LibraryController.java index 618b9f6..0adc032 100644 --- a/backend/src/main/java/com/zl/mjga/controller/LibraryController.java +++ b/backend/src/main/java/com/zl/mjga/controller/LibraryController.java @@ -34,13 +34,14 @@ public class LibraryController { @GetMapping("/libraries") public List queryLibraries() { - return libraryRepository.findAll(); + return libraryRepository.findAll().stream().sorted( + Comparator.comparing(Library::getId).reversed() + ).toList(); } @GetMapping("/docs") public List queryLibraryDocs(@RequestParam Long libraryId) { - List libraryDocs = libraryDocRepository.fetchByLibId(libraryId); - return libraryDocs.stream().sorted( + return libraryDocRepository.fetchByLibId(libraryId).stream().sorted( Comparator.comparing(LibraryDoc::getId).reversed() ).toList(); } diff --git a/backend/src/main/java/com/zl/mjga/service/RagService.java b/backend/src/main/java/com/zl/mjga/service/RagService.java index fd0ea2b..ed46275 100644 --- a/backend/src/main/java/com/zl/mjga/service/RagService.java +++ b/backend/src/main/java/com/zl/mjga/service/RagService.java @@ -50,6 +50,8 @@ public class RagService { private final EmbeddingStore zhiPuEmbeddingStore; + private final EmbeddingStore zhiPuLibraryEmbeddingStore; + private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig; private final AmazonS3DocumentLoader amazonS3DocumentLoader; @@ -76,7 +78,7 @@ public class RagService { List embeddingIdList = libraryDocSegments.stream().map(LibraryDocSegment::getEmbeddingId).toList(); if (CollectionUtils.isNotEmpty(embeddingIdList)) { - zhiPuEmbeddingStore.removeAll(embeddingIdList); + zhiPuLibraryEmbeddingStore.removeAll(embeddingIdList); } libraryDocRepository.deleteById(docId); } @@ -113,7 +115,7 @@ public class RagService { minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser()); List libraryDocSegments = new ArrayList<>(); DocumentByParagraphSplitter documentByParagraphSplitter = - new DocumentByParagraphSplitter(1000, 200); + new DocumentByParagraphSplitter(500, 150); documentByParagraphSplitter .split(document) .forEach( @@ -121,7 +123,7 @@ public class RagService { Response embed = zhipuEmbeddingModel.embed(textSegment); Integer tokenUsage = embed.tokenUsage().totalTokenCount(); Embedding vector = embed.content(); - String embeddingId = zhiPuEmbeddingStore.add(vector, textSegment); + String embeddingId = zhiPuLibraryEmbeddingStore.add(vector, textSegment); LibraryDocSegment libraryDocSegment = new LibraryDocSegment(); libraryDocSegment.setEmbeddingId(embeddingId); libraryDocSegment.setContent(textSegment.text()); @@ -130,6 +132,9 @@ public class RagService { libraryDocSegments.add(libraryDocSegment); }); libraryDocSegmentDao.insert(libraryDocSegments); + LibraryDoc libraryDoc = libraryDocRepository.fetchOneById(libraryDocId); + libraryDoc.setStatus(LibraryDocStatusEnum.SUCCESS); + libraryDocRepository.update(libraryDoc); } public Map searchAction(String message) {