mirror of
https://github.com/ccmjga/zhilu-admin
synced 2026-04-06 13:17:35 +00:00
init library
This commit is contained in:
@@ -64,7 +64,7 @@ dependencies {
|
|||||||
implementation("dev.langchain4j:langchain4j-open-ai:1.0.0")
|
implementation("dev.langchain4j:langchain4j-open-ai:1.0.0")
|
||||||
implementation("dev.langchain4j:langchain4j-pgvector:1.0.1-beta6")
|
implementation("dev.langchain4j:langchain4j-pgvector:1.0.1-beta6")
|
||||||
implementation("dev.langchain4j:langchain4j-community-zhipu-ai:1.0.1-beta6")
|
implementation("dev.langchain4j:langchain4j-community-zhipu-ai:1.0.1-beta6")
|
||||||
implementation("dev.langchain4j:langchain4j-easy-rag:1.1.0-beta7")
|
implementation("dev.langchain4j:langchain4j-document-parser-apache-tika:1.1.0-beta7")
|
||||||
implementation("dev.langchain4j:langchain4j-document-loader-amazon-s3:1.1.0-beta7")
|
implementation("dev.langchain4j:langchain4j-document-loader-amazon-s3:1.1.0-beta7")
|
||||||
implementation("io.projectreactor:reactor-core:3.7.6")
|
implementation("io.projectreactor:reactor-core:3.7.6")
|
||||||
testImplementation("org.testcontainers:junit-jupiter:$testcontainersVersion")
|
testImplementation("org.testcontainers:junit-jupiter:$testcontainersVersion")
|
||||||
@@ -169,11 +169,6 @@ jooq {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
forcedTypes {
|
forcedTypes {
|
||||||
forcedType {
|
|
||||||
name = "varchar"
|
|
||||||
includeExpression = ".*"
|
|
||||||
includeTypes = "JSONB?"
|
|
||||||
}
|
|
||||||
forcedType {
|
forcedType {
|
||||||
name = "varchar"
|
name = "varchar"
|
||||||
includeExpression = ".*"
|
includeExpression = ".*"
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import com.zl.mjga.service.LlmService;
|
|||||||
import dev.langchain4j.community.model.zhipu.ZhipuAiStreamingChatModel;
|
import dev.langchain4j.community.model.zhipu.ZhipuAiStreamingChatModel;
|
||||||
import dev.langchain4j.data.segment.TextSegment;
|
import dev.langchain4j.data.segment.TextSegment;
|
||||||
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
|
import dev.langchain4j.memory.chat.MessageWindowChatMemory;
|
||||||
|
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||||
import dev.langchain4j.model.openai.OpenAiStreamingChatModel;
|
import dev.langchain4j.model.openai.OpenAiStreamingChatModel;
|
||||||
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
|
import dev.langchain4j.rag.content.retriever.EmbeddingStoreContentRetriever;
|
||||||
import dev.langchain4j.service.AiServices;
|
import dev.langchain4j.service.AiServices;
|
||||||
@@ -59,12 +60,19 @@ public class ChatModelInitializer {
|
|||||||
@DependsOn("flywayInitializer")
|
@DependsOn("flywayInitializer")
|
||||||
public AiChatAssistant zhiPuChatAssistant(
|
public AiChatAssistant zhiPuChatAssistant(
|
||||||
ZhipuAiStreamingChatModel zhipuChatModel,
|
ZhipuAiStreamingChatModel zhipuChatModel,
|
||||||
EmbeddingStore<TextSegment> zhiPuLibraryEmbeddingStore) {
|
EmbeddingStore<TextSegment> zhiPuLibraryEmbeddingStore,
|
||||||
|
EmbeddingModel zhipuEmbeddingModel) {
|
||||||
return AiServices.builder(AiChatAssistant.class)
|
return AiServices.builder(AiChatAssistant.class)
|
||||||
.streamingChatModel(zhipuChatModel)
|
.streamingChatModel(zhipuChatModel)
|
||||||
.systemMessageProvider(chatMemoryId -> promptConfiguration.getSystem())
|
.systemMessageProvider(chatMemoryId -> promptConfiguration.getSystem())
|
||||||
.chatMemoryProvider(memoryId -> MessageWindowChatMemory.withMaxMessages(10))
|
.chatMemoryProvider(memoryId -> MessageWindowChatMemory.withMaxMessages(10))
|
||||||
.contentRetriever(EmbeddingStoreContentRetriever.from(zhiPuLibraryEmbeddingStore))
|
.contentRetriever(
|
||||||
|
EmbeddingStoreContentRetriever.builder()
|
||||||
|
.embeddingStore(zhiPuLibraryEmbeddingStore)
|
||||||
|
.embeddingModel(zhipuEmbeddingModel)
|
||||||
|
.minScore(0.75)
|
||||||
|
.maxResults(5)
|
||||||
|
.build())
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import dev.langchain4j.data.document.loader.amazon.s3.AwsCredentials;
|
|||||||
import dev.langchain4j.data.segment.TextSegment;
|
import dev.langchain4j.data.segment.TextSegment;
|
||||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||||
import dev.langchain4j.store.embedding.EmbeddingStore;
|
import dev.langchain4j.store.embedding.EmbeddingStore;
|
||||||
import dev.langchain4j.store.embedding.EmbeddingStoreIngestor;
|
|
||||||
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
|
import dev.langchain4j.store.embedding.pgvector.PgVectorEmbeddingStore;
|
||||||
import jakarta.annotation.Resource;
|
import jakarta.annotation.Resource;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
@@ -75,15 +74,6 @@ public class EmbeddingInitializer {
|
|||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Bean
|
|
||||||
public EmbeddingStoreIngestor zhipuEmbeddingStoreIngestor(
|
|
||||||
EmbeddingStore<TextSegment> zhiPuLibraryEmbeddingStore, EmbeddingModel zhipuEmbeddingModel) {
|
|
||||||
return EmbeddingStoreIngestor.builder()
|
|
||||||
.embeddingModel(zhipuEmbeddingModel)
|
|
||||||
.embeddingStore(zhiPuLibraryEmbeddingStore)
|
|
||||||
.build();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
public AmazonS3DocumentLoader amazonS3DocumentLoader(MinIoConfig minIoConfig) {
|
public AmazonS3DocumentLoader amazonS3DocumentLoader(MinIoConfig minIoConfig) {
|
||||||
return AmazonS3DocumentLoader.builder()
|
return AmazonS3DocumentLoader.builder()
|
||||||
|
|||||||
@@ -7,9 +7,8 @@ import com.zl.mjga.dto.ai.LlmVm;
|
|||||||
import com.zl.mjga.exception.BusinessException;
|
import com.zl.mjga.exception.BusinessException;
|
||||||
import com.zl.mjga.repository.*;
|
import com.zl.mjga.repository.*;
|
||||||
import com.zl.mjga.service.AiChatService;
|
import com.zl.mjga.service.AiChatService;
|
||||||
import com.zl.mjga.service.EmbeddingService;
|
|
||||||
import com.zl.mjga.service.LlmService;
|
import com.zl.mjga.service.LlmService;
|
||||||
import com.zl.mjga.service.UploadService;
|
import com.zl.mjga.service.RagService;
|
||||||
import dev.langchain4j.service.TokenStream;
|
import dev.langchain4j.service.TokenStream;
|
||||||
import jakarta.validation.Valid;
|
import jakarta.validation.Valid;
|
||||||
import java.security.Principal;
|
import java.security.Principal;
|
||||||
@@ -25,7 +24,6 @@ import org.springframework.http.HttpStatus;
|
|||||||
import org.springframework.http.MediaType;
|
import org.springframework.http.MediaType;
|
||||||
import org.springframework.security.access.prepost.PreAuthorize;
|
import org.springframework.security.access.prepost.PreAuthorize;
|
||||||
import org.springframework.web.bind.annotation.*;
|
import org.springframework.web.bind.annotation.*;
|
||||||
import org.springframework.web.multipart.MultipartFile;
|
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
import reactor.core.publisher.Sinks;
|
import reactor.core.publisher.Sinks;
|
||||||
|
|
||||||
@@ -37,14 +35,13 @@ public class AiController {
|
|||||||
|
|
||||||
private final AiChatService aiChatService;
|
private final AiChatService aiChatService;
|
||||||
private final LlmService llmService;
|
private final LlmService llmService;
|
||||||
private final EmbeddingService embeddingService;
|
private final RagService ragService;
|
||||||
private final UserRepository userRepository;
|
private final UserRepository userRepository;
|
||||||
private final DepartmentRepository departmentRepository;
|
private final DepartmentRepository departmentRepository;
|
||||||
private final PositionRepository positionRepository;
|
private final PositionRepository positionRepository;
|
||||||
private final RoleRepository repository;
|
private final RoleRepository repository;
|
||||||
private final PermissionRepository permissionRepository;
|
private final PermissionRepository permissionRepository;
|
||||||
private final RoleRepository roleRepository;
|
private final RoleRepository roleRepository;
|
||||||
private final UploadService uploadService;
|
|
||||||
|
|
||||||
@PostMapping(value = "/action/execute", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
|
@PostMapping(value = "/action/execute", produces = MediaType.TEXT_EVENT_STREAM_VALUE)
|
||||||
public Flux<String> actionExecute(Principal principal, @RequestBody String userMessage) {
|
public Flux<String> actionExecute(Principal principal, @RequestBody String userMessage) {
|
||||||
@@ -112,7 +109,7 @@ public class AiController {
|
|||||||
if (!aiLlmConfig.getEnable()) {
|
if (!aiLlmConfig.getEnable()) {
|
||||||
throw new BusinessException("命令模型未启用,请开启后再试。");
|
throw new BusinessException("命令模型未启用,请开启后再试。");
|
||||||
}
|
}
|
||||||
return embeddingService.searchAction(message);
|
return ragService.searchAction(message);
|
||||||
}
|
}
|
||||||
|
|
||||||
@PreAuthorize("hasAuthority(T(com.zl.mjga.model.urp.EPermission).WRITE_USER_ROLE_PERMISSION)")
|
@PreAuthorize("hasAuthority(T(com.zl.mjga.model.urp.EPermission).WRITE_USER_ROLE_PERMISSION)")
|
||||||
@@ -172,15 +169,4 @@ public class AiController {
|
|||||||
void createNewConversation(Principal principal) {
|
void createNewConversation(Principal principal) {
|
||||||
aiChatService.evictChatMemory(principal.getName());
|
aiChatService.evictChatMemory(principal.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
@PostMapping(
|
|
||||||
value = "/library/upload",
|
|
||||||
consumes = MediaType.MULTIPART_FORM_DATA_VALUE,
|
|
||||||
produces = MediaType.TEXT_PLAIN_VALUE)
|
|
||||||
public String uploadLibraryFile(@RequestPart("file") MultipartFile multipartFile)
|
|
||||||
throws Exception {
|
|
||||||
String objectName = uploadService.uploadLibraryFile(multipartFile);
|
|
||||||
embeddingService.ingestDocument(objectName);
|
|
||||||
return objectName;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,66 @@
|
|||||||
|
package com.zl.mjga.controller;
|
||||||
|
|
||||||
|
import com.zl.mjga.dto.library.LibraryDocUpdateDto;
|
||||||
|
import com.zl.mjga.dto.library.LibraryUpsertDto;
|
||||||
|
import com.zl.mjga.repository.LibraryDocRepository;
|
||||||
|
import com.zl.mjga.repository.LibraryRepository;
|
||||||
|
import com.zl.mjga.service.RagService;
|
||||||
|
import com.zl.mjga.service.UploadService;
|
||||||
|
import jakarta.validation.Valid;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.jooq.generated.mjga.tables.pojos.Library;
|
||||||
|
import org.jooq.generated.mjga.tables.pojos.LibraryDoc;
|
||||||
|
import org.springframework.http.MediaType;
|
||||||
|
import org.springframework.web.bind.annotation.*;
|
||||||
|
import org.springframework.web.multipart.MultipartFile;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/library")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Slf4j
|
||||||
|
public class LibraryController {
|
||||||
|
|
||||||
|
private final UploadService uploadService;
|
||||||
|
private final RagService ragService;
|
||||||
|
private final LibraryRepository libraryRepository;
|
||||||
|
private final LibraryDocRepository libraryDocRepository;
|
||||||
|
|
||||||
|
@PostMapping("/upsert")
|
||||||
|
public void upsertLibrary(@RequestBody @Valid LibraryUpsertDto libraryUpsertDto) {
|
||||||
|
Library library = new Library();
|
||||||
|
library.setId(libraryUpsertDto.id());
|
||||||
|
library.setName(libraryUpsertDto.name());
|
||||||
|
libraryRepository.merge(library);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DeleteMapping
|
||||||
|
public void deleteLibrary(@RequestParam Long libraryId) {
|
||||||
|
ragService.deleteLibraryBy(libraryId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@DeleteMapping("/doc")
|
||||||
|
public void deleteLibraryDoc(@RequestParam Long libraryDocId) {
|
||||||
|
ragService.deleteDocBy(libraryDocId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PutMapping("/doc")
|
||||||
|
public void updateLibraryDoc(@RequestBody @Valid LibraryDocUpdateDto libraryDocUpdateDto) {
|
||||||
|
LibraryDoc libraryDoc = new LibraryDoc();
|
||||||
|
libraryDoc.setId(libraryDocUpdateDto.id());
|
||||||
|
libraryDoc.setEnable(libraryDocUpdateDto.enable());
|
||||||
|
libraryDocRepository.merge(libraryDoc);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostMapping(
|
||||||
|
value = "/upload",
|
||||||
|
consumes = MediaType.MULTIPART_FORM_DATA_VALUE,
|
||||||
|
produces = MediaType.TEXT_PLAIN_VALUE)
|
||||||
|
public String uploadLibraryDoc(
|
||||||
|
@RequestPart("libraryId") Long libraryId, @RequestPart("file") MultipartFile multipartFile)
|
||||||
|
throws Exception {
|
||||||
|
String objectName = uploadService.uploadLibraryDoc(multipartFile);
|
||||||
|
ragService.ingestDocumentBy(libraryId, objectName, multipartFile.getOriginalFilename());
|
||||||
|
return objectName;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,6 @@
|
|||||||
|
package com.zl.mjga.dto.library;
|
||||||
|
|
||||||
|
import jakarta.validation.constraints.NotEmpty;
|
||||||
|
import jakarta.validation.constraints.NotNull;
|
||||||
|
|
||||||
|
public record LibraryDocUpdateDto(@NotNull Long id, @NotEmpty Boolean enable) {}
|
||||||
@@ -0,0 +1,5 @@
|
|||||||
|
package com.zl.mjga.dto.library;
|
||||||
|
|
||||||
|
import jakarta.validation.constraints.NotEmpty;
|
||||||
|
|
||||||
|
public record LibraryUpsertDto(Long id, @NotEmpty String name) {}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package com.zl.mjga.repository;
|
||||||
|
|
||||||
|
import org.jooq.Configuration;
|
||||||
|
import org.jooq.generated.mjga.tables.daos.LibraryDocDao;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public class LibraryDocRepository extends LibraryDocDao {
|
||||||
|
@Autowired
|
||||||
|
public LibraryDocRepository(Configuration configuration) {
|
||||||
|
super(configuration);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,14 @@
|
|||||||
|
package com.zl.mjga.repository;
|
||||||
|
|
||||||
|
import org.jooq.Configuration;
|
||||||
|
import org.jooq.generated.mjga.tables.daos.LibraryDocSegmentDao;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public class LibraryDocSegmentRepository extends LibraryDocSegmentDao {
|
||||||
|
@Autowired
|
||||||
|
public LibraryDocSegmentRepository(Configuration configuration) {
|
||||||
|
super(configuration);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
package com.zl.mjga.repository;
|
||||||
|
|
||||||
|
import org.jooq.Configuration;
|
||||||
|
import org.jooq.generated.mjga.tables.daos.LibraryDao;
|
||||||
|
import org.springframework.beans.factory.annotation.Autowired;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public class LibraryRepository extends LibraryDao {
|
||||||
|
|
||||||
|
@Autowired
|
||||||
|
public LibraryRepository(Configuration configuration) {
|
||||||
|
super(configuration);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,92 +0,0 @@
|
|||||||
package com.zl.mjga.service;
|
|
||||||
|
|
||||||
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
|
|
||||||
|
|
||||||
import com.zl.mjga.config.ai.ZhiPuEmbeddingModelConfig;
|
|
||||||
import com.zl.mjga.config.minio.MinIoConfig;
|
|
||||||
import com.zl.mjga.model.urp.Actions;
|
|
||||||
import dev.langchain4j.data.document.Document;
|
|
||||||
import dev.langchain4j.data.document.Metadata;
|
|
||||||
import dev.langchain4j.data.document.loader.amazon.s3.AmazonS3DocumentLoader;
|
|
||||||
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
|
|
||||||
import dev.langchain4j.data.embedding.Embedding;
|
|
||||||
import dev.langchain4j.data.segment.TextSegment;
|
|
||||||
import dev.langchain4j.model.embedding.EmbeddingModel;
|
|
||||||
import dev.langchain4j.store.embedding.*;
|
|
||||||
import dev.langchain4j.store.embedding.filter.Filter;
|
|
||||||
import io.minio.errors.*;
|
|
||||||
import jakarta.annotation.PostConstruct;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.springframework.context.annotation.Configuration;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
|
|
||||||
@Configuration
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
@Service
|
|
||||||
@Slf4j
|
|
||||||
public class EmbeddingService {
|
|
||||||
|
|
||||||
private final EmbeddingModel zhipuEmbeddingModel;
|
|
||||||
|
|
||||||
private final EmbeddingStore<TextSegment> zhiPuEmbeddingStore;
|
|
||||||
|
|
||||||
private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig;
|
|
||||||
|
|
||||||
private final AmazonS3DocumentLoader amazonS3DocumentLoader;
|
|
||||||
|
|
||||||
private final EmbeddingStoreIngestor zhiPuEmbeddingStoreIngestor;
|
|
||||||
|
|
||||||
private final MinIoConfig minIoConfig;
|
|
||||||
|
|
||||||
public void ingestDocument(String objectName) {
|
|
||||||
Document document =
|
|
||||||
amazonS3DocumentLoader.loadDocument(
|
|
||||||
minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser());
|
|
||||||
IngestionResult ingest = zhiPuEmbeddingStoreIngestor.ingest(document);
|
|
||||||
log.info("Ingest document finished {}", ingest);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<String, String> searchAction(String message) {
|
|
||||||
Map<String, String> result = new HashMap<>();
|
|
||||||
EmbeddingSearchRequest embeddingSearchRequest =
|
|
||||||
EmbeddingSearchRequest.builder()
|
|
||||||
.queryEmbedding(zhipuEmbeddingModel.embed(message).content())
|
|
||||||
.minScore(0.89)
|
|
||||||
.build();
|
|
||||||
EmbeddingSearchResult<TextSegment> embeddingSearchResult =
|
|
||||||
zhiPuEmbeddingStore.search(embeddingSearchRequest);
|
|
||||||
if (!embeddingSearchResult.matches().isEmpty()) {
|
|
||||||
Metadata metadata = embeddingSearchResult.matches().getFirst().embedded().metadata();
|
|
||||||
result.put(Actions.INDEX_KEY, metadata.getString(Actions.INDEX_KEY));
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@PostConstruct
|
|
||||||
public void initActionIndex() {
|
|
||||||
if (!zhiPuEmbeddingModelConfig.getEnable()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
for (Actions action : Actions.values()) {
|
|
||||||
Embedding queryEmbedding = zhipuEmbeddingModel.embed(action.getContent()).content();
|
|
||||||
Filter createUserFilter = metadataKey(Actions.INDEX_KEY).isEqualTo(action.getCode());
|
|
||||||
EmbeddingSearchRequest embeddingSearchRequest =
|
|
||||||
EmbeddingSearchRequest.builder()
|
|
||||||
.queryEmbedding(queryEmbedding)
|
|
||||||
.filter(createUserFilter)
|
|
||||||
.build();
|
|
||||||
EmbeddingSearchResult<TextSegment> embeddingSearchResult =
|
|
||||||
zhiPuEmbeddingStore.search(embeddingSearchRequest);
|
|
||||||
if (embeddingSearchResult.matches().isEmpty()) {
|
|
||||||
TextSegment segment =
|
|
||||||
TextSegment.from(
|
|
||||||
action.getContent(), Metadata.metadata(Actions.INDEX_KEY, action.getCode()));
|
|
||||||
Embedding embedding = zhipuEmbeddingModel.embed(segment).content();
|
|
||||||
zhiPuEmbeddingStore.add(embedding, segment);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
189
backend/src/main/java/com/zl/mjga/service/RagService.java
Normal file
189
backend/src/main/java/com/zl/mjga/service/RagService.java
Normal file
@@ -0,0 +1,189 @@
|
|||||||
|
package com.zl.mjga.service;
|
||||||
|
|
||||||
|
import static dev.langchain4j.store.embedding.filter.MetadataFilterBuilder.metadataKey;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.JsonProcessingException;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import com.zl.mjga.config.ai.ZhiPuEmbeddingModelConfig;
|
||||||
|
import com.zl.mjga.config.minio.MinIoConfig;
|
||||||
|
import com.zl.mjga.model.urp.Actions;
|
||||||
|
import com.zl.mjga.repository.LibraryDocRepository;
|
||||||
|
import com.zl.mjga.repository.LibraryRepository;
|
||||||
|
import dev.langchain4j.data.document.Document;
|
||||||
|
import dev.langchain4j.data.document.Metadata;
|
||||||
|
import dev.langchain4j.data.document.loader.amazon.s3.AmazonS3DocumentLoader;
|
||||||
|
import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
|
||||||
|
import dev.langchain4j.data.document.splitter.DocumentByParagraphSplitter;
|
||||||
|
import dev.langchain4j.data.embedding.Embedding;
|
||||||
|
import dev.langchain4j.data.segment.TextSegment;
|
||||||
|
import dev.langchain4j.model.embedding.EmbeddingModel;
|
||||||
|
import dev.langchain4j.model.output.Response;
|
||||||
|
import dev.langchain4j.store.embedding.*;
|
||||||
|
import dev.langchain4j.store.embedding.filter.Filter;
|
||||||
|
import jakarta.annotation.PostConstruct;
|
||||||
|
import java.time.Instant;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.apache.commons.collections4.CollectionUtils;
|
||||||
|
import org.apache.commons.lang3.RandomStringUtils;
|
||||||
|
import org.jooq.JSON;
|
||||||
|
import org.jooq.generated.mjga.tables.daos.LibraryDocSegmentDao;
|
||||||
|
import org.jooq.generated.mjga.tables.pojos.LibraryDoc;
|
||||||
|
import org.jooq.generated.mjga.tables.pojos.LibraryDocSegment;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Service
|
||||||
|
@Slf4j
|
||||||
|
public class RagService {
|
||||||
|
|
||||||
|
private final EmbeddingModel zhipuEmbeddingModel;
|
||||||
|
|
||||||
|
private final EmbeddingStore<TextSegment> zhiPuEmbeddingStore;
|
||||||
|
|
||||||
|
private final ZhiPuEmbeddingModelConfig zhiPuEmbeddingModelConfig;
|
||||||
|
|
||||||
|
private final AmazonS3DocumentLoader amazonS3DocumentLoader;
|
||||||
|
|
||||||
|
private final MinIoConfig minIoConfig;
|
||||||
|
|
||||||
|
private final LibraryRepository libraryRepository;
|
||||||
|
|
||||||
|
private final LibraryDocRepository libraryDocRepository;
|
||||||
|
|
||||||
|
private final LibraryDocSegmentDao libraryDocSegmentDao;
|
||||||
|
|
||||||
|
public void deleteLibraryBy(Long libraryId) {
|
||||||
|
List<LibraryDoc> libraryDocs = libraryDocRepository.fetchByLibId(libraryId);
|
||||||
|
List<Long> docIds = libraryDocs.stream().map(LibraryDoc::getId).toList();
|
||||||
|
for (Long docId : docIds) {
|
||||||
|
deleteDocBy(docId);
|
||||||
|
}
|
||||||
|
libraryRepository.deleteById(libraryId);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void deleteDocBy(Long docId) {
|
||||||
|
List<LibraryDocSegment> libraryDocSegments = libraryDocSegmentDao.fetchByDocId(docId);
|
||||||
|
List<String> embeddingIdList =
|
||||||
|
libraryDocSegments.stream().map(LibraryDocSegment::getEmbeddingId).toList();
|
||||||
|
if (CollectionUtils.isNotEmpty(embeddingIdList)) {
|
||||||
|
zhiPuEmbeddingStore.removeAll(embeddingIdList);
|
||||||
|
}
|
||||||
|
libraryDocRepository.deleteById(docId);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(rollbackFor = Throwable.class)
|
||||||
|
public void ingestDocumentBy(Long libraryId, String objectName, String originalName)
|
||||||
|
throws Exception {
|
||||||
|
Document document =
|
||||||
|
amazonS3DocumentLoader.loadDocument(
|
||||||
|
minIoConfig.getDefaultBucket(), objectName, new ApacheTikaDocumentParser());
|
||||||
|
ArrayList<String> embeddingIds = new ArrayList<>();
|
||||||
|
try {
|
||||||
|
Long libraryDocId = createLibraryDoc(objectName, originalName, document.metadata().toMap());
|
||||||
|
DocumentByParagraphSplitter documentByParagraphSplitter =
|
||||||
|
new DocumentByParagraphSplitter(1000, 200);
|
||||||
|
documentByParagraphSplitter
|
||||||
|
.split(document)
|
||||||
|
.forEach(
|
||||||
|
textSegment -> {
|
||||||
|
Metadata metadata = textSegment.metadata();
|
||||||
|
metadata.put("libraryId", libraryId);
|
||||||
|
Response<Embedding> embed = zhipuEmbeddingModel.embed(textSegment);
|
||||||
|
Integer tokenUsage = embed.tokenUsage().totalTokenCount();
|
||||||
|
Embedding vector = embed.content();
|
||||||
|
String embeddingId = zhiPuEmbeddingStore.add(vector, textSegment);
|
||||||
|
embeddingIds.add(embeddingId);
|
||||||
|
createLibraryDocSegment(textSegment, libraryDocId, tokenUsage, embeddingId);
|
||||||
|
});
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(
|
||||||
|
"文档采集失败。libraryId {} objectName {} originalName {}",
|
||||||
|
libraryId,
|
||||||
|
objectName,
|
||||||
|
originalName,
|
||||||
|
e);
|
||||||
|
if (CollectionUtils.isNotEmpty(embeddingIds)) {
|
||||||
|
zhiPuEmbeddingStore.removeAll(embeddingIds);
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createLibraryDocSegment(
|
||||||
|
TextSegment textSegment, Long libraryDocId, Integer tokenUsage, String embeddingId) {
|
||||||
|
LibraryDocSegment libraryDocSegment = new LibraryDocSegment();
|
||||||
|
libraryDocSegment.setDocId(libraryDocId);
|
||||||
|
libraryDocSegment.setContent(textSegment.text());
|
||||||
|
libraryDocSegment.setTokenUsage(tokenUsage);
|
||||||
|
libraryDocSegment.setEmbeddingId(embeddingId);
|
||||||
|
libraryDocSegmentDao.insert();
|
||||||
|
}
|
||||||
|
|
||||||
|
private Long createLibraryDoc(String objectName, String originalName, Map meta)
|
||||||
|
throws JsonProcessingException {
|
||||||
|
String identify =
|
||||||
|
String.format(
|
||||||
|
"%d%s_%s",
|
||||||
|
Instant.now().toEpochMilli(),
|
||||||
|
RandomStringUtils.insecure().nextAlphabetic(6),
|
||||||
|
originalName);
|
||||||
|
LibraryDoc libraryDoc = new LibraryDoc();
|
||||||
|
ObjectMapper objectMapper = new ObjectMapper();
|
||||||
|
String metaJson = objectMapper.writeValueAsString(meta);
|
||||||
|
libraryDoc.setMeta(JSON.valueOf(metaJson));
|
||||||
|
libraryDoc.setPath(objectName);
|
||||||
|
libraryDoc.setName(originalName);
|
||||||
|
libraryDoc.setIdentify(identify);
|
||||||
|
libraryDocRepository.insert(libraryDoc);
|
||||||
|
return libraryDocRepository.fetchOneByIdentify(identify).getId();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, String> searchAction(String message) {
|
||||||
|
Map<String, String> result = new HashMap<>();
|
||||||
|
EmbeddingSearchRequest embeddingSearchRequest =
|
||||||
|
EmbeddingSearchRequest.builder()
|
||||||
|
.queryEmbedding(zhipuEmbeddingModel.embed(message).content())
|
||||||
|
.minScore(0.89)
|
||||||
|
.build();
|
||||||
|
EmbeddingSearchResult<TextSegment> embeddingSearchResult =
|
||||||
|
zhiPuEmbeddingStore.search(embeddingSearchRequest);
|
||||||
|
if (!embeddingSearchResult.matches().isEmpty()) {
|
||||||
|
Metadata metadata = embeddingSearchResult.matches().getFirst().embedded().metadata();
|
||||||
|
result.put(Actions.INDEX_KEY, metadata.getString(Actions.INDEX_KEY));
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@PostConstruct
|
||||||
|
public void initActionIndex() {
|
||||||
|
if (!zhiPuEmbeddingModelConfig.getEnable()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (Actions action : Actions.values()) {
|
||||||
|
Embedding queryEmbedding = zhipuEmbeddingModel.embed(action.getContent()).content();
|
||||||
|
Filter createUserFilter = metadataKey(Actions.INDEX_KEY).isEqualTo(action.getCode());
|
||||||
|
EmbeddingSearchRequest embeddingSearchRequest =
|
||||||
|
EmbeddingSearchRequest.builder()
|
||||||
|
.queryEmbedding(queryEmbedding)
|
||||||
|
.filter(createUserFilter)
|
||||||
|
.build();
|
||||||
|
EmbeddingSearchResult<TextSegment> embeddingSearchResult =
|
||||||
|
zhiPuEmbeddingStore.search(embeddingSearchRequest);
|
||||||
|
if (embeddingSearchResult.matches().isEmpty()) {
|
||||||
|
TextSegment segment =
|
||||||
|
TextSegment.from(
|
||||||
|
action.getContent(), Metadata.metadata(Actions.INDEX_KEY, action.getCode()));
|
||||||
|
Embedding embedding = zhipuEmbeddingModel.embed(segment).content();
|
||||||
|
zhiPuEmbeddingStore.add(embedding, segment);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -58,7 +58,7 @@ public class UploadService {
|
|||||||
return objectName;
|
return objectName;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String uploadLibraryFile(MultipartFile multipartFile) throws Exception {
|
public String uploadLibraryDoc(MultipartFile multipartFile) throws Exception {
|
||||||
String originalFilename = multipartFile.getOriginalFilename();
|
String originalFilename = multipartFile.getOriginalFilename();
|
||||||
if (StringUtils.isEmpty(originalFilename)) {
|
if (StringUtils.isEmpty(originalFilename)) {
|
||||||
throw new BusinessException("文件名不能为空");
|
throw new BusinessException("文件名不能为空");
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ CREATE TABLE mjga.user (
|
|||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
username VARCHAR NOT NULL UNIQUE,
|
username VARCHAR NOT NULL UNIQUE,
|
||||||
avatar VARCHAR,
|
avatar VARCHAR,
|
||||||
create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
password VARCHAR NOT NULL,
|
password VARCHAR NOT NULL,
|
||||||
enable BOOLEAN NOT NULL DEFAULT TRUE
|
enable BOOLEAN NOT NULL DEFAULT TRUE
|
||||||
);
|
);
|
||||||
@@ -39,7 +39,7 @@ CREATE TABLE mjga.user_role_map (
|
|||||||
|
|
||||||
CREATE TABLE mjga.department (
|
CREATE TABLE mjga.department (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
name VARCHAR(255) NOT NULL UNIQUE,
|
name VARCHAR NOT NULL UNIQUE,
|
||||||
parent_id BIGINT,
|
parent_id BIGINT,
|
||||||
FOREIGN KEY (parent_id)
|
FOREIGN KEY (parent_id)
|
||||||
REFERENCES mjga.department(id)
|
REFERENCES mjga.department(id)
|
||||||
@@ -56,7 +56,7 @@ CREATE TABLE mjga.user_department_map (
|
|||||||
|
|
||||||
CREATE TABLE mjga.position (
|
CREATE TABLE mjga.position (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
name VARCHAR(255) NOT NULL UNIQUE
|
name VARCHAR NOT NULL UNIQUE
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE mjga.user_position_map (
|
CREATE TABLE mjga.user_position_map (
|
||||||
@@ -80,12 +80,12 @@ CREATE TYPE "llm_type_enum" AS ENUM (
|
|||||||
|
|
||||||
CREATE TABLE mjga.ai_llm_config (
|
CREATE TABLE mjga.ai_llm_config (
|
||||||
id BIGSERIAL NOT NULL UNIQUE,
|
id BIGSERIAL NOT NULL UNIQUE,
|
||||||
name VARCHAR(255) NOT NULL UNIQUE,
|
name VARCHAR NOT NULL UNIQUE,
|
||||||
code mjga.llm_code_enum NOT NULL UNIQUE,
|
code mjga.llm_code_enum NOT NULL UNIQUE,
|
||||||
model_name VARCHAR(255) NOT NULL,
|
model_name VARCHAR NOT NULL,
|
||||||
type LLM_TYPE_ENUM NOT NULL,
|
type LLM_TYPE_ENUM NOT NULL,
|
||||||
api_key VARCHAR(255) NOT NULL,
|
api_key VARCHAR NOT NULL,
|
||||||
url VARCHAR(255) NOT NULL,
|
url VARCHAR NOT NULL,
|
||||||
enable BOOLEAN NOT NULL DEFAULT true,
|
enable BOOLEAN NOT NULL DEFAULT true,
|
||||||
priority SMALLINT NOT NULL DEFAULT 0,
|
priority SMALLINT NOT NULL DEFAULT 0,
|
||||||
PRIMARY KEY(id)
|
PRIMARY KEY(id)
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
CREATE TABLE mjga.library (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR NOT NULL UNIQUE,
|
||||||
|
data_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mjga.library_doc (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
lib_id BIGINT NOT NULL,
|
||||||
|
name VARCHAR NOT NULL,
|
||||||
|
identify VARCHAR NOT NULL UNIQUE,
|
||||||
|
path VARCHAR NOT NULL,
|
||||||
|
meta JSON NOT NULL,
|
||||||
|
enable BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
update_time TIMESTAMPTZ,
|
||||||
|
FOREIGN KEY (lib_id) REFERENCES mjga.library (id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mjga.library_doc_segment (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
doc_id BIGINT NOT NULL,
|
||||||
|
embedding_id VARCHAR NOT NULL UNIQUE,
|
||||||
|
content TEXT,
|
||||||
|
token_usage INTEGER NOT NULL DEFAULT 0,
|
||||||
|
FOREIGN KEY (doc_id) REFERENCES mjga.library_doc (id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
@@ -4,7 +4,7 @@ CREATE TABLE mjga.user (
|
|||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
username VARCHAR NOT NULL UNIQUE,
|
username VARCHAR NOT NULL UNIQUE,
|
||||||
avatar VARCHAR,
|
avatar VARCHAR,
|
||||||
create_time TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
password VARCHAR NOT NULL,
|
password VARCHAR NOT NULL,
|
||||||
enable BOOLEAN NOT NULL DEFAULT TRUE
|
enable BOOLEAN NOT NULL DEFAULT TRUE
|
||||||
);
|
);
|
||||||
@@ -39,7 +39,7 @@ CREATE TABLE mjga.user_role_map (
|
|||||||
|
|
||||||
CREATE TABLE mjga.department (
|
CREATE TABLE mjga.department (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
name VARCHAR(255) NOT NULL UNIQUE,
|
name VARCHAR NOT NULL UNIQUE,
|
||||||
parent_id BIGINT,
|
parent_id BIGINT,
|
||||||
FOREIGN KEY (parent_id)
|
FOREIGN KEY (parent_id)
|
||||||
REFERENCES mjga.department(id)
|
REFERENCES mjga.department(id)
|
||||||
@@ -56,7 +56,7 @@ CREATE TABLE mjga.user_department_map (
|
|||||||
|
|
||||||
CREATE TABLE mjga.position (
|
CREATE TABLE mjga.position (
|
||||||
id BIGSERIAL PRIMARY KEY,
|
id BIGSERIAL PRIMARY KEY,
|
||||||
name VARCHAR(255) NOT NULL UNIQUE
|
name VARCHAR NOT NULL UNIQUE
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE TABLE mjga.user_position_map (
|
CREATE TABLE mjga.user_position_map (
|
||||||
@@ -80,12 +80,12 @@ CREATE TYPE "llm_type_enum" AS ENUM (
|
|||||||
|
|
||||||
CREATE TABLE mjga.ai_llm_config (
|
CREATE TABLE mjga.ai_llm_config (
|
||||||
id BIGSERIAL NOT NULL UNIQUE,
|
id BIGSERIAL NOT NULL UNIQUE,
|
||||||
name VARCHAR(255) NOT NULL UNIQUE,
|
name VARCHAR NOT NULL UNIQUE,
|
||||||
code mjga.llm_code_enum NOT NULL UNIQUE,
|
code mjga.llm_code_enum NOT NULL UNIQUE,
|
||||||
model_name VARCHAR(255) NOT NULL,
|
model_name VARCHAR NOT NULL,
|
||||||
type LLM_TYPE_ENUM NOT NULL,
|
type LLM_TYPE_ENUM NOT NULL,
|
||||||
api_key VARCHAR(255) NOT NULL,
|
api_key VARCHAR NOT NULL,
|
||||||
url VARCHAR(255) NOT NULL,
|
url VARCHAR NOT NULL,
|
||||||
enable BOOLEAN NOT NULL DEFAULT true,
|
enable BOOLEAN NOT NULL DEFAULT true,
|
||||||
priority SMALLINT NOT NULL DEFAULT 0,
|
priority SMALLINT NOT NULL DEFAULT 0,
|
||||||
PRIMARY KEY(id)
|
PRIMARY KEY(id)
|
||||||
|
|||||||
@@ -0,0 +1,28 @@
|
|||||||
|
CREATE TABLE mjga.library (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
name VARCHAR NOT NULL UNIQUE,
|
||||||
|
data_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mjga.library_doc (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
lib_id BIGINT NOT NULL,
|
||||||
|
name VARCHAR NOT NULL,
|
||||||
|
identify VARCHAR NOT NULL UNIQUE,
|
||||||
|
path VARCHAR NOT NULL,
|
||||||
|
meta JSON NOT NULL,
|
||||||
|
enable BOOLEAN NOT NULL DEFAULT true,
|
||||||
|
create_time TIMESTAMPTZ NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||||
|
update_time TIMESTAMPTZ,
|
||||||
|
FOREIGN KEY (lib_id) REFERENCES mjga.library (id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE mjga.library_doc_segment (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
|
doc_id BIGINT NOT NULL,
|
||||||
|
embedding_id VARCHAR NOT NULL UNIQUE,
|
||||||
|
content TEXT,
|
||||||
|
token_usage INTEGER NOT NULL DEFAULT 0,
|
||||||
|
FOREIGN KEY (doc_id) REFERENCES mjga.library_doc (id) ON DELETE CASCADE
|
||||||
|
);
|
||||||
Reference in New Issue
Block a user