fix parse bugs

This commit is contained in:
Chuck1sn
2025-06-27 18:47:17 +08:00
parent 8ed0b795f3
commit 4d70b49e61
2 changed files with 12 additions and 6 deletions

View File

@@ -34,13 +34,14 @@ public class LibraryController {
@GetMapping("/libraries") @GetMapping("/libraries")
public List<Library> queryLibraries() { public List<Library> queryLibraries() {
return libraryRepository.findAll(); return libraryRepository.findAll().stream().sorted(
Comparator.comparing(Library::getId).reversed()
).toList();
} }
@GetMapping("/docs") @GetMapping("/docs")
public List<LibraryDoc> queryLibraryDocs(@RequestParam Long libraryId) { public List<LibraryDoc> queryLibraryDocs(@RequestParam Long libraryId) {
List<LibraryDoc> libraryDocs = libraryDocRepository.fetchByLibId(libraryId); return libraryDocRepository.fetchByLibId(libraryId).stream().sorted(
return libraryDocs.stream().sorted(
Comparator.comparing(LibraryDoc::getId).reversed() Comparator.comparing(LibraryDoc::getId).reversed()
).toList(); ).toList();
} }

View File

@@ -50,6 +50,8 @@ public class RagService {
private final EmbeddingStore<TextSegment> zhiPuEmbeddingStore; private final EmbeddingStore<TextSegment> zhiPuEmbeddingStore;
private final EmbeddingStore<TextSegment> zhiPuLibraryEmbeddingStore;
private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig; private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig;
private final AmazonS3DocumentLoader amazonS3DocumentLoader; private final AmazonS3DocumentLoader amazonS3DocumentLoader;
@@ -76,7 +78,7 @@ public class RagService {
List<String> embeddingIdList = List<String> embeddingIdList =
libraryDocSegments.stream().map(LibraryDocSegment::getEmbeddingId).toList(); libraryDocSegments.stream().map(LibraryDocSegment::getEmbeddingId).toList();
if (CollectionUtils.isNotEmpty(embeddingIdList)) { if (CollectionUtils.isNotEmpty(embeddingIdList)) {
zhiPuEmbeddingStore.removeAll(embeddingIdList); zhiPuLibraryEmbeddingStore.removeAll(embeddingIdList);
} }
libraryDocRepository.deleteById(docId); libraryDocRepository.deleteById(docId);
} }
@@ -113,7 +115,7 @@ public class RagService {
minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser()); minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser());
List<LibraryDocSegment> libraryDocSegments = new ArrayList<>(); List<LibraryDocSegment> libraryDocSegments = new ArrayList<>();
DocumentByParagraphSplitter documentByParagraphSplitter = DocumentByParagraphSplitter documentByParagraphSplitter =
new DocumentByParagraphSplitter(1000, 200); new DocumentByParagraphSplitter(500, 150);
documentByParagraphSplitter documentByParagraphSplitter
.split(document) .split(document)
.forEach( .forEach(
@@ -121,7 +123,7 @@ public class RagService {
Response<Embedding> embed = zhipuEmbeddingModel.embed(textSegment); Response<Embedding> embed = zhipuEmbeddingModel.embed(textSegment);
Integer tokenUsage = embed.tokenUsage().totalTokenCount(); Integer tokenUsage = embed.tokenUsage().totalTokenCount();
Embedding vector = embed.content(); Embedding vector = embed.content();
String embeddingId = zhiPuEmbeddingStore.add(vector, textSegment); String embeddingId = zhiPuLibraryEmbeddingStore.add(vector, textSegment);
LibraryDocSegment libraryDocSegment = new LibraryDocSegment(); LibraryDocSegment libraryDocSegment = new LibraryDocSegment();
libraryDocSegment.setEmbeddingId(embeddingId); libraryDocSegment.setEmbeddingId(embeddingId);
libraryDocSegment.setContent(textSegment.text()); libraryDocSegment.setContent(textSegment.text());
@@ -130,6 +132,9 @@ public class RagService {
libraryDocSegments.add(libraryDocSegment); libraryDocSegments.add(libraryDocSegment);
}); });
libraryDocSegmentDao.insert(libraryDocSegments); libraryDocSegmentDao.insert(libraryDocSegments);
LibraryDoc libraryDoc = libraryDocRepository.fetchOneById(libraryDocId);
libraryDoc.setStatus(LibraryDocStatusEnum.SUCCESS);
libraryDocRepository.update(libraryDoc);
} }
public Map<String, String> searchAction(String message) { public Map<String, String> searchAction(String message) {