feat(知识库): 增加知识库模块

This commit is contained in:
ageer
2025-03-02 11:19:29 +08:00
parent 04f579d033
commit 1385b165c9
1421 changed files with 166583 additions and 64636 deletions

View File

@@ -0,0 +1,37 @@
package org.ruoyi.knowledge.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.knowledge.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class CodeFileLoader implements ResourceLoader{
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader bufferedReader = new BufferedReader(reader)){
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content){
return textSplitter.split(content);
}
}

View File

@@ -0,0 +1,16 @@
package org.ruoyi.knowledge.chain.loader;
import java.io.InputStream;
import java.util.List;
public class CsvFileLoader implements ResourceLoader{
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content) {
return null;
}
}

View File

@@ -0,0 +1,16 @@
package org.ruoyi.knowledge.chain.loader;
import java.io.InputStream;
import java.util.List;
public class FolderLoader implements ResourceLoader{
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content) {
return null;
}
}

View File

@@ -0,0 +1,16 @@
package org.ruoyi.knowledge.chain.loader;
import java.io.InputStream;
import java.util.List;
public class GithubLoader implements ResourceLoader{
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content) {
return null;
}
}

View File

@@ -0,0 +1,16 @@
package org.ruoyi.knowledge.chain.loader;
import java.io.InputStream;
import java.util.List;
public class JsonFileLoader implements ResourceLoader{
@Override
public String getContent(InputStream inputStream) {
return null;
}
@Override
public List<String> getChunkList(String content) {
return null;
}
}

View File

@@ -0,0 +1,37 @@
package org.ruoyi.knowledge.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.knowledge.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class MarkDownFileLoader implements ResourceLoader{
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream);
BufferedReader bufferedReader = new BufferedReader(reader)){
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content){
return textSplitter.split(content);
}
}

View File

@@ -0,0 +1,34 @@
package org.ruoyi.knowledge.chain.loader;
import lombok.AllArgsConstructor;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.ruoyi.knowledge.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
public class PdfFileLoader implements ResourceLoader{
private final TextSplitter characterTextSplitter;
@Override
public String getContent(InputStream inputStream) {
PDDocument document = null;
try {
document = PDDocument.load(inputStream);
PDFTextStripper textStripper = new PDFTextStripper();
String content = textStripper.getText(document);
return content;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public List<String> getChunkList(String content) {
return characterTextSplitter.split(content);
}
}

View File

@@ -0,0 +1,12 @@
package org.ruoyi.knowledge.chain.loader;
import java.io.InputStream;
import java.util.List;
/**
* 资源载入
*/
public interface ResourceLoader {
String getContent(InputStream inputStream);
List<String> getChunkList(String content);
}

View File

@@ -0,0 +1,33 @@
package org.ruoyi.knowledge.chain.loader;
import lombok.AllArgsConstructor;
import org.ruoyi.knowledge.chain.split.CharacterTextSplitter;
import org.ruoyi.knowledge.chain.split.CodeTextSplitter;
import org.ruoyi.knowledge.chain.split.MarkdownTextSplitter;
import org.ruoyi.knowledge.chain.split.TokenTextSplitter;
import org.ruoyi.knowledge.constant.FileType;
import org.springframework.stereotype.Component;
@AllArgsConstructor
@Component
public class ResourceLoaderFactory {
private final CharacterTextSplitter characterTextSplitter;
private final CodeTextSplitter codeTextSplitter;
private final MarkdownTextSplitter markdownTextSplitter;
private final TokenTextSplitter tokenTextSplitter;
public ResourceLoader getLoaderByFileType(String fileType){
if (FileType.isTextFile(fileType)){
return new TextFileLoader(characterTextSplitter);
} else if (FileType.isWord(fileType)) {
return new WordLoader(characterTextSplitter);
} else if (FileType.isPdf(fileType)) {
return new PdfFileLoader(characterTextSplitter);
} else if (FileType.isMdFile(fileType)) {
return new MarkDownFileLoader(markdownTextSplitter);
}else if (FileType.isCodeFile(fileType)) {
return new CodeFileLoader(codeTextSplitter);
}else {
return new TextFileLoader(characterTextSplitter);
}
}
}

View File

@@ -0,0 +1,37 @@
package org.ruoyi.knowledge.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.knowledge.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class TextFileLoader implements ResourceLoader{
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
StringBuffer stringBuffer = new StringBuffer();
try (InputStreamReader reader = new InputStreamReader(inputStream, "UTF-8");
BufferedReader bufferedReader = new BufferedReader(reader)){
String line;
while ((line = bufferedReader.readLine()) != null) {
stringBuffer.append(line).append("\n");
}
} catch (IOException e) {
e.printStackTrace();
}
return stringBuffer.toString();
}
@Override
public List<String> getChunkList(String content){
return textSplitter.split(content);
}
}

View File

@@ -0,0 +1,37 @@
package org.ruoyi.knowledge.chain.loader;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.ruoyi.knowledge.chain.split.TextSplitter;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.io.InputStream;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class WordLoader implements ResourceLoader{
private final TextSplitter textSplitter;
@Override
public String getContent(InputStream inputStream) {
XWPFDocument document = null;
try {
document = new XWPFDocument(inputStream);
XWPFWordExtractor extractor = new XWPFWordExtractor(document);
String content = extractor.getText();
return content;
} catch (IOException e) {
throw new RuntimeException(e);
}
}
@Override
public List<String> getChunkList(String content) {
return textSplitter.split(content);
}
}

View File

@@ -0,0 +1,16 @@
package org.ruoyi.knowledge.chain.retrieve;
import lombok.Data;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
@Data
@Component
public class PromptRetrieverProperties {
/**
* 从知识库中检索的条数limits 应大于 num
*/
@Value("${chain.limits}")
private int limits;
}

View File

@@ -0,0 +1,51 @@
package org.ruoyi.knowledge.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Component;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
@Primary
public class CharacterTextSplitter implements TextSplitter{
private final SplitterProperties splitterProperties;
@Override
public List<String> split(String content) {
List<String> chunkList = new ArrayList<>();
if (content.contains(splitterProperties.getEndspliter())){
// 按自定义分隔符切分
String[] chunks = content.split(splitterProperties.getEndspliter());
chunkList.addAll(Arrays.asList(chunks));
}else {
int indexMin = 0;
int len = content.length();
int i = 0;
int right = 0;
while (true) {
if (len > right ){
int begin = i*splitterProperties.getSize() - splitterProperties.getOverlay();
if (begin < indexMin){
begin = indexMin;
}
int end = splitterProperties.getSize()*(i+1) + splitterProperties.getOverlay();
if (end > len){
end = len;
}
String chunk = content.substring(begin,end);
chunkList.add(chunk);
i++;
right = right + splitterProperties.getSize();
}else {
break;
}
}
}
return chunkList;
}
}

View File

@@ -0,0 +1,17 @@
package org.ruoyi.knowledge.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class CodeTextSplitter implements TextSplitter{
@Override
public List<String> split(String content) {
return null;
}
}

View File

@@ -0,0 +1,17 @@
package org.ruoyi.knowledge.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class MarkdownTextSplitter implements TextSplitter{
@Override
public List<String> split(String content) {
return null;
}
}

View File

@@ -0,0 +1,30 @@
package org.ruoyi.knowledge.chain.split;
import lombok.Data;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.stereotype.Component;
@Data
@Component
@ConfigurationProperties(prefix = "chain.split.chunk")
public class SplitterProperties {
/**
* 分段标识符
*/
private String endspliter;
/**
* 提问分段标识符
*/
private String qaspliter;
/**
* 分块文本大小
*/
private int size;
/**
* 相邻块之间重叠的字符数(避免边界信息丢失)
*/
private int overlay;
}

View File

@@ -0,0 +1,11 @@
package org.ruoyi.knowledge.chain.split;
import java.util.List;
/**
* 文本切分
*/
public interface TextSplitter {
List<String> split(String content);
}

View File

@@ -0,0 +1,17 @@
package org.ruoyi.knowledge.chain.split;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@AllArgsConstructor
@Slf4j
public class TokenTextSplitter implements TextSplitter{
@Override
public List<String> split(String content) {
return null;
}
}

View File

@@ -0,0 +1,60 @@
package org.ruoyi.knowledge.chain.vectorizer;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.common.chat.config.ChatConfig;
import org.ruoyi.common.chat.entity.embeddings.Embedding;
import org.ruoyi.common.chat.entity.embeddings.EmbeddingResponse;
import org.ruoyi.common.chat.openai.OpenAiStreamClient;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
@Component
@Slf4j
@RequiredArgsConstructor
public class OpenAiVectorization implements Vectorization {
@Value("${chain.vector.model}")
private String embeddingModel;
@Getter
private OpenAiStreamClient openAiStreamClient;
private final ChatConfig chatConfig;
@Override
public List<List<Double>> batchVectorization(List<String> chunkList) {
openAiStreamClient = chatConfig.getOpenAiStreamClient();
Embedding embedding = Embedding.builder()
.input(chunkList)
.model(embeddingModel)
.build();
EmbeddingResponse embeddings = openAiStreamClient.embeddings(embedding);
List<List<Double>> vectorList = new ArrayList<>();
embeddings.getData().forEach(data -> {
List<BigDecimal> vector = data.getEmbedding();
List<Double> doubleVector = new ArrayList<>();
for (BigDecimal bd : vector) {
doubleVector.add(bd.doubleValue());
}
vectorList.add(doubleVector);
});
return vectorList;
}
@Override
public List<Double> singleVectorization(String chunk) {
List<String> chunkList = new ArrayList<>();
chunkList.add(chunk);
List<List<Double>> vectorList = batchVectorization(chunkList);
return vectorList.get(0);
}
}

View File

@@ -0,0 +1,11 @@
package org.ruoyi.knowledge.chain.vectorizer;
import java.util.List;
/**
* 向量化
*/
public interface Vectorization {
List<List<Double>> batchVectorization(List<String> chunkList);
List<Double> singleVectorization(String chunk);
}

View File

@@ -0,0 +1,23 @@
package org.ruoyi.knowledge.chain.vectorizer;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Component;
/**
* 文本向量化
* @author huangkh
*/
@Component
@Slf4j
public class VectorizationFactory {
private final OpenAiVectorization openAiVectorization;
public VectorizationFactory(OpenAiVectorization openAiVectorization) {
this.openAiVectorization = openAiVectorization;
}
public Vectorization getEmbedding(){
return openAiVectorization;
}
}

View File

@@ -0,0 +1,28 @@
package org.ruoyi.knowledge.chain.vectorizer;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Component;
import java.util.List;
@Component
@Slf4j
@Primary
@AllArgsConstructor
public class VectorizationWrapper implements Vectorization{
private final VectorizationFactory vectorizationFactory;
@Override
public List<List<Double>> batchVectorization(List<String> chunkList) {
Vectorization embedding = vectorizationFactory.getEmbedding();
return embedding.batchVectorization(chunkList);
}
@Override
public List<Double> singleVectorization(String chunk) {
Vectorization embedding = vectorizationFactory.getEmbedding();
return embedding.singleVectorization(chunk);
}
}

View File

@@ -0,0 +1,244 @@
package org.ruoyi.knowledge.chain.vectorstore;
import io.milvus.client.MilvusServiceClient;
import io.milvus.grpc.DataType;
import io.milvus.grpc.SearchResults;
import io.milvus.param.ConnectParam;
import io.milvus.param.IndexType;
import io.milvus.param.MetricType;
import io.milvus.param.R;
import io.milvus.param.collection.CreateCollectionParam;
import io.milvus.param.collection.DropCollectionParam;
import io.milvus.param.collection.FieldType;
import io.milvus.param.collection.LoadCollectionParam;
import io.milvus.param.dml.DeleteParam;
import io.milvus.param.dml.InsertParam;
import io.milvus.param.dml.SearchParam;
import io.milvus.param.index.CreateIndexParam;
import io.milvus.param.partition.CreatePartitionParam;
import io.milvus.response.QueryResultsWrapper;
import io.milvus.response.SearchResultsWrapper;
import jakarta.annotation.PostConstruct;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@Service
@Slf4j
public class MilvusVectorStore implements VectorStore{
@Value("${chain.vector.store.milvus.host}")
private String milvusHost;
@Value("${chain.vector.store.milvus.port}")
private Integer milvausPort;
@Value("${chain.vector.store.milvus.dimension}")
private Integer dimension;
@Value("${chain.vector.store.milvus.collection}")
private String collectionName;
private MilvusServiceClient milvusServiceClient;
@PostConstruct
public void init(){
milvusServiceClient = new MilvusServiceClient(
ConnectParam.newBuilder()
.withHost(milvusHost)
.withPort(milvausPort)
.withDatabaseName("default")
.build()
);
}
private void createSchema(String kid) {
FieldType primaryField = FieldType.newBuilder()
.withName("row_id")
.withDataType(DataType.Int64)
.withPrimaryKey(true)
.withAutoID(true)
.build();
FieldType contentField = FieldType.newBuilder()
.withName("content")
.withDataType(DataType.VarChar)
.withMaxLength(1000)
.build();
FieldType kidField = FieldType.newBuilder()
.withName("kid")
.withDataType(DataType.VarChar)
.withMaxLength(20)
.build();
FieldType docIdField = FieldType.newBuilder()
.withName("docId")
.withDataType(DataType.VarChar)
.withMaxLength(20)
.build();
FieldType fidField = FieldType.newBuilder()
.withName("fid")
.withDataType(DataType.VarChar)
.withMaxLength(20)
.build();
FieldType vectorField = FieldType.newBuilder()
.withName("fv")
.withDataType(DataType.FloatVector)
.withDimension(dimension)
.build();
CreateCollectionParam createCollectionReq = CreateCollectionParam.newBuilder()
.withCollectionName(collectionName + kid)
.withDescription("local knowledge")
.addFieldType(primaryField)
.addFieldType(contentField)
.addFieldType(kidField)
.addFieldType(docIdField)
.addFieldType(fidField)
.addFieldType(vectorField)
.build();
milvusServiceClient.createCollection(createCollectionReq);
// 创建向量的索引
IndexType INDEX_TYPE = IndexType.IVF_FLAT;
String INDEX_PARAM = "{\"nlist\":1024}";
milvusServiceClient.createIndex(
CreateIndexParam.newBuilder()
.withCollectionName(collectionName + kid)
.withFieldName("fv")
.withIndexType(INDEX_TYPE)
.withMetricType(MetricType.IP)
.withExtraParam(INDEX_PARAM)
.withSyncMode(Boolean.FALSE)
.build()
);
}
@Override
public void newSchema(String kid) {
createSchema(kid);
}
@Override
public void removeByKidAndFid(String kid, String fid) {
milvusServiceClient.delete(
DeleteParam.newBuilder()
.withCollectionName(collectionName + kid)
.withExpr("fid == " + fid)
.build()
);
}
@Override
public void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList, String kid, String docId, List<String> fidList) {
if (StringUtils.isNotBlank(docId)){
milvusServiceClient.createPartition(
CreatePartitionParam.newBuilder()
.withCollectionName(collectionName + kid)
.withPartitionName(docId)
.build()
);
}
List<List<Float>> vectorFloatList = new ArrayList<>();
List<String> kidList = new ArrayList<>();
List<String> docIdList = new ArrayList<>();
for (int i = 0; i < chunkList.size(); i++) {
List<Double> vector = vectorList.get(i);
List<Float> vfList = new ArrayList<>();
for (int j = 0; j < vector.size(); j++) {
Double value = vector.get(j);
vfList.add(value.floatValue());
}
vectorFloatList.add(vfList);
kidList.add(kid);
docIdList.add(docId);
}
List<InsertParam.Field> fields = new ArrayList<>();
fields.add(new InsertParam.Field("content", chunkList));
fields.add(new InsertParam.Field("kid", kidList));
fields.add(new InsertParam.Field("docId", docIdList));
fields.add(new InsertParam.Field("fid", fidList));
fields.add(new InsertParam.Field("fv", vectorFloatList));
InsertParam insertParam = InsertParam.newBuilder()
.withCollectionName(collectionName + kid)
.withPartitionName(docId)
.withFields(fields)
.build();
milvusServiceClient.insert(insertParam);
// milvus在将数据装载到内存后才能进行向量计算
milvusServiceClient.loadCollection(LoadCollectionParam.newBuilder().withCollectionName(collectionName + kid).build());
}
@Override
public void removeByDocId(String kid, String docId) {
milvusServiceClient.delete(
DeleteParam.newBuilder()
.withCollectionName(collectionName + kid)
.withExpr("1 == 1")
.withPartitionName(docId)
.build()
);
}
@Override
public void removeByKid(String kid) {
milvusServiceClient.dropCollection(
DropCollectionParam.newBuilder()
.withCollectionName(collectionName + kid)
.build()
);
}
@Override
public List<String> nearest(List<Double> queryVector, String kid) {
List<String> search_output_fields = Arrays.asList("content","fv");
List<Float> fv = new ArrayList<>();
for (int i = 0; i < queryVector.size(); i++) {
fv.add(queryVector.get(i).floatValue());
}
List<List<Float>> vectors = new ArrayList<>();
vectors.add(fv);
String search_param = "{\"nprobe\":10, \"offset\":0}";
SearchParam searchParam = SearchParam.newBuilder()
.withCollectionName(collectionName + kid)
.withMetricType(MetricType.IP)
.withOutFields(search_output_fields)
.withTopK(10)
.withVectors(vectors)
.withVectorFieldName("fv")
.withParams(search_param)
.build();
R<SearchResults> respSearch = milvusServiceClient.search(searchParam);
SearchResultsWrapper wrapperSearch = new SearchResultsWrapper(respSearch.getData().getResults());
List<QueryResultsWrapper.RowRecord> rowRecords = wrapperSearch.getRowRecords();
List<String> resultList = new ArrayList<>();
if (resultList!=null && resultList.size() > 0){
for (int i = 0; i < rowRecords.size(); i++) {
String content = rowRecords.get(i).get("content").toString();
resultList.add(content);
}
}
return resultList;
}
/**
* milvus 不支持通过文本检索相似性
* @param query
* @param kid
* @return
*/
@Override
public List<String> nearest(String query, String kid) {
return null;
}
}

View File

@@ -0,0 +1,18 @@
package org.ruoyi.knowledge.chain.vectorstore;
import java.util.List;
/**
* 向量存储
*/
public interface VectorStore {
void storeEmbeddings(List<String> chunkList,List<List<Double>> vectorList, String kid, String docId,List<String> fidList);
void removeByDocId(String kid,String docId);
void removeByKid(String kid);
List<String> nearest(List<Double> queryVector,String kid);
List<String> nearest(String query,String kid);
void newSchema(String kid);
void removeByKidAndFid(String kid, String fid);
}

View File

@@ -0,0 +1,31 @@
package org.ruoyi.knowledge.chain.vectorstore;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
@Component
@Slf4j
public class VectorStoreFactory {
@Value("${chain.vector.store.type}")
private String type;
private final WeaviateVectorStore weaviateVectorStore;
private final MilvusVectorStore milvusVectorStore;
public VectorStoreFactory(WeaviateVectorStore weaviateVectorStore, MilvusVectorStore milvusVectorStore) {
this.weaviateVectorStore = weaviateVectorStore;
this.milvusVectorStore = milvusVectorStore;
}
public VectorStore getVectorStore(){
if ("weaviate".equals(type)){
return weaviateVectorStore;
}else if ("milvus".equals(type)){
return milvusVectorStore;
}
return null;
}
}

View File

@@ -0,0 +1,58 @@
package org.ruoyi.knowledge.chain.vectorstore;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Primary;
import org.springframework.stereotype.Service;
import java.util.List;
@Service
@Slf4j
@Primary
@AllArgsConstructor
public class VectorStoreWrapper implements VectorStore{
private final VectorStoreFactory vectorStoreFactory;
@Override
public void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList, String kid, String docId, List<String> fidList) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
vectorStore.storeEmbeddings(chunkList, vectorList, kid, docId, fidList);
}
@Override
public void removeByDocId(String kid, String docId) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
vectorStore.removeByDocId(kid,docId);
}
@Override
public void removeByKid(String kid) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
vectorStore.removeByKid(kid);
}
@Override
public List<String> nearest(List<Double> queryVector, String kid) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
return vectorStore.nearest(queryVector,kid);
}
@Override
public List<String> nearest(String query, String kid) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
return vectorStore.nearest(query, kid);
}
@Override
public void newSchema(String kid) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
vectorStore.newSchema(kid);
}
@Override
public void removeByKidAndFid(String kid, String fid) {
VectorStore vectorStore = vectorStoreFactory.getVectorStore();
vectorStore.removeByKidAndFid(kid, fid);
}
}

View File

@@ -0,0 +1,372 @@
package org.ruoyi.knowledge.chain.vectorstore;
import cn.hutool.core.lang.UUID;
import com.alibaba.fastjson2.JSONObject;
import com.google.gson.internal.LinkedTreeMap;
import io.weaviate.client.Config;
import io.weaviate.client.WeaviateClient;
import io.weaviate.client.base.Result;
import io.weaviate.client.v1.data.model.WeaviateObject;
import io.weaviate.client.v1.data.replication.model.ConsistencyLevel;
import io.weaviate.client.v1.filters.Operator;
import io.weaviate.client.v1.filters.WhereFilter;
import io.weaviate.client.v1.graphql.model.GraphQLResponse;
import io.weaviate.client.v1.graphql.query.argument.NearTextArgument;
import io.weaviate.client.v1.graphql.query.argument.NearVectorArgument;
import io.weaviate.client.v1.graphql.query.fields.Field;
import io.weaviate.client.v1.misc.model.Meta;
import io.weaviate.client.v1.misc.model.ReplicationConfig;
import io.weaviate.client.v1.misc.model.ShardingConfig;
import io.weaviate.client.v1.misc.model.VectorIndexConfig;
import io.weaviate.client.v1.schema.model.DataType;
import io.weaviate.client.v1.schema.model.Property;
import io.weaviate.client.v1.schema.model.Schema;
import io.weaviate.client.v1.schema.model.WeaviateClass;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;
import org.ruoyi.knowledge.chain.retrieve.PromptRetrieverProperties;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@Service
@Slf4j
public class WeaviateVectorStore implements VectorStore{
@Value("${chain.vector.store.weaviate.protocol}")
private String protocol;
@Value("${chain.vector.store.weaviate.host}")
private String host;
@Value("${chain.vector.store.weaviate.classname}")
private String className;
private final PromptRetrieverProperties promptRetrieverProperties;
public WeaviateVectorStore(PromptRetrieverProperties promptRetrieverProperties) {
this.promptRetrieverProperties = promptRetrieverProperties;
}
public WeaviateClient getClient(){
Config config = new Config(protocol, host);
WeaviateClient client = new WeaviateClient(config);
return client;
}
public Result<Meta> getMeta(){
WeaviateClient client = getClient();
Result<Meta> meta = client.misc().metaGetter().run();
if (meta.getError() == null) {
System.out.printf("meta.hostname: %s\n", meta.getResult().getHostname());
System.out.printf("meta.version: %s\n", meta.getResult().getVersion());
System.out.printf("meta.modules: %s\n", meta.getResult().getModules());
} else {
System.out.printf("Error: %s\n", meta.getError().getMessages());
}
return meta;
}
public Result<Schema> getSchemas(){
WeaviateClient client = getClient();
Result<Schema> result = client.schema().getter().run();
if (result.hasErrors()) {
System.out.println(result.getError());
}else {
System.out.println(result.getResult());
}
return result;
}
public Result<Boolean> createSchema(String kid){
WeaviateClient client = getClient();
VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder()
.distance("cosine")
.cleanupIntervalSeconds(300)
.efConstruction(128)
.maxConnections(64)
.vectorCacheMaxObjects(500000L)
.ef(-1)
.skip(false)
.dynamicEfFactor(8)
.dynamicEfMax(500)
.dynamicEfMin(100)
.flatSearchCutoff(40000)
.build();
ShardingConfig shardingConfig = ShardingConfig.builder()
.desiredCount(3)
.desiredVirtualCount(128)
.function("murmur3")
.key("_id")
.strategy("hash")
.virtualPerPhysical(128)
.build();
ReplicationConfig replicationConfig = ReplicationConfig.builder()
.factor(1)
.build();
JSONObject classModuleConfigValue = new JSONObject();
classModuleConfigValue.put("vectorizeClassName",false);
JSONObject classModuleConfig = new JSONObject();
classModuleConfig.put("text2vec-transformers",classModuleConfigValue);
JSONObject propertyModuleConfigValueSkipTrue = new JSONObject();
propertyModuleConfigValueSkipTrue.put("vectorizePropertyName",false);
propertyModuleConfigValueSkipTrue.put("skip",true);
JSONObject propertyModuleConfigSkipTrue = new JSONObject();
propertyModuleConfigSkipTrue.put("text2vec-transformers",propertyModuleConfigValueSkipTrue);
JSONObject propertyModuleConfigValueSkipFalse = new JSONObject();
propertyModuleConfigValueSkipFalse.put("vectorizePropertyName",false);
propertyModuleConfigValueSkipFalse.put("skip",false);
JSONObject propertyModuleConfigSkipFalse = new JSONObject();
propertyModuleConfigSkipFalse.put("text2vec-transformers",propertyModuleConfigValueSkipFalse);
WeaviateClass clazz = WeaviateClass.builder()
.className(className + kid)
.description("local knowledge")
.vectorIndexType("hnsw")
.vectorizer("text2vec-transformers")
.shardingConfig(shardingConfig)
.vectorIndexConfig(vectorIndexConfig)
.replicationConfig(replicationConfig)
.moduleConfig(classModuleConfig)
.properties(new ArrayList() {{
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("content")
.description("The content of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipFalse)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("kid")
.description("The knowledge id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("docId")
.description("The doc id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("fid")
.description("The fragment id of the local knowledge,for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
add(Property.builder()
.dataType(new ArrayList(){ { add(DataType.TEXT); } })
.name("uuid")
.description("The uuid id of the local knowledge fragment(same with id properties),for search")
.moduleConfig(propertyModuleConfigSkipTrue)
.build());
} })
.build();
Result<Boolean> result = client.schema().classCreator().withClass(clazz).run();
if (result.hasErrors()) {
System.out.println(result.getError());
}
System.out.println(result.getResult());
return result;
}
@Override
public void newSchema(String kid) {
createSchema(kid);
}
@Override
public void removeByKidAndFid(String kid, String fid) {
List<String> resultList = new ArrayList<>();
WeaviateClient client = getClient();
Field fieldId = Field.builder().name("uuid").build();
WhereFilter where = WhereFilter.builder()
.path(new String[]{ "fid" })
.operator(Operator.Equal)
.valueString(fid)
.build();
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(fieldId)
.withWhere(where)
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
String uuid = linkedTreeMap.get("uuid").toString();
resultList.add(uuid);
}
for (String uuid : resultList) {
Result<Boolean> deleteResult = client.data().deleter()
.withID(uuid)
.withClassName(className + kid)
.withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM
.run();
}
}
@Override
public void storeEmbeddings(List<String> chunkList, List<List<Double>> vectorList,String kid, String docId,List<String> fidList) {
WeaviateClient client = getClient();
for (int i = 0; i < chunkList.size(); i++) {
if (vectorList != null) {
List<Double> vector = vectorList.get(i);
Float[] vf = new Float[vector.size()];
for (int j = 0; j < vector.size(); j++) {
Double value = vector.get(j);
vf[j] = value.floatValue();
}
Map<String, Object> dataSchema = new HashMap<>();
dataSchema.put("content", chunkList.get(i));
dataSchema.put("kid", kid);
dataSchema.put("docId", docId);
dataSchema.put("fid", fidList.get(i));
String uuid = UUID.randomUUID(true).toString();
dataSchema.put("uuid", uuid);
Result<WeaviateObject> result = client.data().creator()
.withClassName(className + kid)
.withID(uuid)
.withVector(vf)
.withProperties(dataSchema)
.run();
}
}
}
@Override
public void removeByDocId(String kid,String docId) {
List<String> resultList = new ArrayList<>();
WeaviateClient client = getClient();
Field fieldId = Field.builder().name("uuid").build();
WhereFilter where = WhereFilter.builder()
.path(new String[]{ "docId" })
.operator(Operator.Equal)
.valueString(docId)
.build();
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(fieldId)
.withWhere(where)
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
String uuid = linkedTreeMap.get("uuid").toString();
resultList.add(uuid);
}
for (String uuid : resultList) {
Result<Boolean> deleteResult = client.data().deleter()
.withID(uuid)
.withClassName(className + kid)
.withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM
.run();
}
}
@Override
public void removeByKid(String kid) {
WeaviateClient client = getClient();
Result<Boolean> result = client.schema().classDeleter().withClassName(className + kid).run();
if (result.hasErrors()) {
System.out.println("删除schema失败" + result.getError());
}else {
System.out.println("删除schema成功" + result.getResult());
}
log.info("drop schema by kid, result = {}",result);
}
@Override
public List<String> nearest(List<Double> queryVector,String kid) {
if (StringUtils.isBlank(kid)){
return new ArrayList<String>();
}
List<String> resultList = new ArrayList<>();
Float[] vf = new Float[queryVector.size()];
for (int j = 0; j < queryVector.size(); j++) {
Double value = queryVector.get(j);
vf[j] = value.floatValue();
}
WeaviateClient client = getClient();
Field contentField = Field.builder().name("content").build();
Field _additional = Field.builder()
.name("_additional")
.fields(new Field[]{
Field.builder().name("distance").build()
}).build();
NearVectorArgument nearVector = NearVectorArgument.builder()
.vector(vf)
.distance(1.6f) // certainty = 1f - distance /2f
.build();
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(contentField,_additional)
.withNearVector(nearVector)
.withLimit(promptRetrieverProperties.getLimits())
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
String content = linkedTreeMap.get("content").toString();
resultList.add(content);
}
return resultList;
}
@Override
public List<String> nearest(String query,String kid) {
if (StringUtils.isBlank(kid)){
return new ArrayList<String>();
}
List<String> resultList = new ArrayList<>();
WeaviateClient client = getClient();
Field contentField = Field.builder().name("content").build();
Field _additional = Field.builder()
.name("_additional")
.fields(new Field[]{
Field.builder().name("distance").build()
}).build();
NearTextArgument nearText = client.graphQL().arguments().nearTextArgBuilder()
.concepts(new String[]{ query })
.distance(1.6f) // certainty = 1f - distance /2f
.build();
Result<GraphQLResponse> result = client.graphQL().get()
.withClassName(className + kid)
.withFields(contentField,_additional)
.withNearText(nearText)
.withLimit(promptRetrieverProperties.getLimits())
.run();
LinkedTreeMap<String,Object> t = (LinkedTreeMap<String, Object>) result.getResult().getData();
LinkedTreeMap<String,ArrayList<LinkedTreeMap>> l = (LinkedTreeMap<String, ArrayList<LinkedTreeMap>>) t.get("Get");
ArrayList<LinkedTreeMap> m = l.get(className + kid);
for (LinkedTreeMap linkedTreeMap : m){
String content = linkedTreeMap.get("content").toString();
resultList.add(content);
}
return resultList;
}
public Result<Boolean> deleteSchema(String kid) {
WeaviateClient client = getClient();
Result<Boolean> result = client.schema().classDeleter().withClassName(className+ kid).run();
if (result.hasErrors()) {
System.out.println(result.getError());
}else {
System.out.println(result.getResult());
}
return result;
}
}

View File

@@ -0,0 +1,91 @@
package org.ruoyi.knowledge.constant;
public class FileType {
public static final String TXT = "txt";
public static final String CSV = "csv";
public static final String MD = "md";
public static final String DOC = "doc";
public static final String DOCX = "docx";
public static final String PDF = "pdf";
public static final String LOG = "log";
public static final String XML = "xml";
public static final String JAVA = "java";
public static final String HTML = "html";
public static final String HTM = "htm";
public static final String CSS = "css";
public static final String JS = "js";
public static final String PY = "py";
public static final String CPP = "cpp";
public static final String SQL = "sql";
public static final String PHP = "php";
public static final String RUBY = "ruby";
public static final String C = "c";
public static final String H = "h";
public static final String HPP = "hpp";
public static final String SWIFT = "swift";
public static final String TS = "ts";
public static final String RUST = "rs";
public static final String PERL = "perl";
public static final String SHELL = "shell";
public static final String BAT = "bat";
public static final String CMD = "cmd";
public static final String PROPERTIES = "properties";
public static final String INI = "ini";
public static final String YAML = "yaml";
public static final String YML = "yml";
public static boolean isTextFile(String type){
if (type.equalsIgnoreCase(TXT) || type.equalsIgnoreCase(CSV) || type.equalsIgnoreCase(PROPERTIES)
|| type.equalsIgnoreCase(INI) || type.equalsIgnoreCase(YAML) || type.equalsIgnoreCase(YML)
|| type.equalsIgnoreCase(LOG) || type.equalsIgnoreCase(XML)){
return true;
}
else {
return false;
}
}
public static boolean isCodeFile(String type){
if (type.equalsIgnoreCase(JAVA) || type.equalsIgnoreCase(HTML) || type.equalsIgnoreCase(HTM) || type.equalsIgnoreCase(JS) || type.equalsIgnoreCase(PY)
|| type.equalsIgnoreCase(CPP) || type.equalsIgnoreCase(SQL) || type.equalsIgnoreCase(PHP) || type.equalsIgnoreCase(RUBY)
|| type.equalsIgnoreCase(C) || type.equalsIgnoreCase(H) || type.equalsIgnoreCase(HPP) || type.equalsIgnoreCase(SWIFT)
|| type.equalsIgnoreCase(TS) || type.equalsIgnoreCase(RUST) || type.equalsIgnoreCase(PERL) || type.equalsIgnoreCase(SHELL)
|| type.equalsIgnoreCase(BAT) || type.equalsIgnoreCase(CMD) || type.equalsIgnoreCase(CSS)){
return true;
}
else {
return false;
}
}
public static boolean isMdFile(String type){
if (type.equalsIgnoreCase(MD)){
return true;
}
else {
return false;
}
}
public static boolean isWord(String type){
if (type.equalsIgnoreCase(DOC) || type.equalsIgnoreCase(DOCX)){
return true;
}
else {
return false;
}
}
public static boolean isPdf(String type){
if (type.equalsIgnoreCase(PDF)){
return true;
}
else {
return false;
}
}
}

View File

@@ -0,0 +1,60 @@
package org.ruoyi.knowledge.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import java.io.Serializable;
import java.util.Date;
/**
* 知识库附件对象 knowledge_attach
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@TableName("knowledge_attach")
public class KnowledgeAttach implements Serializable {
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 文档ID
*/
private String docId;
/**
* 文档名称
*/
private String docName;
/**
* 文档类型
*/
private String docType;
/**
* 文档内容
*/
private String content;
/**
* 创建者
*/
private String createBy;
/**
* 创建时间
*/
private Date createTime;
}

View File

@@ -0,0 +1,62 @@
package org.ruoyi.knowledge.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import java.io.Serial;
import java.io.Serializable;
import java.util.Date;
/**
* 知识片段对象 knowledge_fragment
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@TableName("knowledge_fragment")
public class KnowledgeFragment implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 文档ID
*/
private String docId;
/**
* 知识片段ID
*/
private String fid;
/**
* 片段索引下标
*/
private Integer idx;
/**
* 文档内容
*/
private String content;
/**
* 创建者
*/
private String createBy;
/**
* 创建时间
*/
private Date createTime;
}

View File

@@ -0,0 +1,63 @@
package org.ruoyi.knowledge.domain;
import com.baomidou.mybatisplus.annotation.TableId;
import com.baomidou.mybatisplus.annotation.TableName;
import lombok.Data;
import java.io.Serial;
import java.io.Serializable;
import java.util.Date;
/**
* 知识库对象 knowledge_info
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@TableName("knowledge_info")
public class KnowledgeInfo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@TableId(value = "id")
private Long id;
/**
* 知识库ID
*/
private String kid;
/**
* 用户ID
*/
private Long uid;
/**
* 知识库名称
*/
private String kname;
/**
* 描述
*/
private String description;
/**
* 创建者
*/
private String createBy;
/**
* 创建时间
*/
private Date createTime;
}

View File

@@ -0,0 +1,56 @@
package org.ruoyi.knowledge.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.mybatis.core.domain.BaseEntity;
import org.ruoyi.knowledge.domain.KnowledgeAttach;
/**
* 知识库附件业务对象 knowledge_attach
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeAttach.class, reverseConvertGenerate = false)
public class KnowledgeAttachBo extends BaseEntity {
@NotNull(message = "不能为空")
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空")
private String kid;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空")
private String docId;
/**
* 文档名称
*/
@NotBlank(message = "文档名称不能为空")
private String docName;
/**
* 文档类型
*/
@NotBlank(message = "文档类型不能为空")
private String docType;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空")
private String content;
}

View File

@@ -0,0 +1,59 @@
package org.ruoyi.knowledge.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.mybatis.core.domain.BaseEntity;
import org.ruoyi.knowledge.domain.KnowledgeFragment;
/**
* 知识片段业务对象 knowledge_fragment
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeFragment.class, reverseConvertGenerate = false)
public class KnowledgeFragmentBo extends BaseEntity {
/**
*
*/
@NotNull(message = "不能为空")
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空")
private String kid;
/**
* 文档ID
*/
@NotBlank(message = "文档ID不能为空")
private String docId;
/**
* 知识片段ID
*/
@NotBlank(message = "知识片段ID不能为空")
private String fid;
/**
* 片段索引下标
*/
@NotNull(message = "片段索引下标不能为空")
private Long idx;
/**
* 文档内容
*/
@NotBlank(message = "文档内容不能为空")
private String content;
}

View File

@@ -0,0 +1,53 @@
package org.ruoyi.knowledge.domain.bo;
import io.github.linpeilie.annotations.AutoMapper;
import jakarta.validation.constraints.NotBlank;
import jakarta.validation.constraints.NotNull;
import lombok.Data;
import lombok.EqualsAndHashCode;
import org.ruoyi.common.mybatis.core.domain.BaseEntity;
import org.ruoyi.knowledge.domain.KnowledgeInfo;
/**
* 知识库业务对象 knowledge_info
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@EqualsAndHashCode(callSuper = true)
@AutoMapper(target = KnowledgeInfo.class, reverseConvertGenerate = false)
public class KnowledgeInfoBo extends BaseEntity {
/**
*
*/
@NotNull(message = "不能为空")
private Long id;
/**
* 知识库ID
*/
@NotBlank(message = "知识库ID不能为空")
private String kid;
/**
* 用户ID
*/
@NotNull(message = "用户ID不能为空")
private Long uid;
/**
* 知识库名称
*/
@NotBlank(message = "知识库名称不能为空")
private String kname;
/**
* 描述
*/
@NotBlank(message = "描述不能为空")
private String description;
}

View File

@@ -0,0 +1,13 @@
package org.ruoyi.knowledge.domain.req;
import lombok.Data;
import org.springframework.web.multipart.MultipartFile;
@Data
public class KnowledgeInfoUploadRequest {
private String kid;
private MultipartFile file;
}

View File

@@ -0,0 +1,4 @@
package org.ruoyi.knowledge.domain;
public class request {
}

View File

@@ -0,0 +1,63 @@
package org.ruoyi.knowledge.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.knowledge.domain.KnowledgeAttach;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识库附件视图对象 knowledge_attach
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeAttach.class)
public class KnowledgeAttachVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 文档ID
*/
@ExcelProperty(value = "文档ID")
private String docId;
/**
* 文档名称
*/
@ExcelProperty(value = "文档名称")
private String docName;
/**
* 文档类型
*/
@ExcelProperty(value = "文档类型")
private String docType;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
}

View File

@@ -0,0 +1,62 @@
package org.ruoyi.knowledge.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.knowledge.domain.KnowledgeFragment;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识片段视图对象 knowledge_fragment
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeFragment.class)
public class KnowledgeFragmentVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 文档ID
*/
@ExcelProperty(value = "文档ID")
private String docId;
/**
* 知识片段ID
*/
@ExcelProperty(value = "知识片段ID")
private String fid;
/**
* 片段索引下标
*/
@ExcelProperty(value = "片段索引下标")
private Long idx;
/**
* 文档内容
*/
@ExcelProperty(value = "文档内容")
private String content;
}

View File

@@ -0,0 +1,57 @@
package org.ruoyi.knowledge.domain.vo;
import com.alibaba.excel.annotation.ExcelIgnoreUnannotated;
import com.alibaba.excel.annotation.ExcelProperty;
import io.github.linpeilie.annotations.AutoMapper;
import lombok.Data;
import org.ruoyi.knowledge.domain.KnowledgeInfo;
import java.io.Serial;
import java.io.Serializable;
/**
* 知识库视图对象 knowledge_info
*
* @author Lion Li
* @date 2024-10-21
*/
@Data
@ExcelIgnoreUnannotated
@AutoMapper(target = KnowledgeInfo.class)
public class KnowledgeInfoVo implements Serializable {
@Serial
private static final long serialVersionUID = 1L;
/**
*
*/
@ExcelProperty(value = "")
private Long id;
/**
* 知识库ID
*/
@ExcelProperty(value = "知识库ID")
private String kid;
/**
* 用户ID
*/
@ExcelProperty(value = "用户ID")
private Long uid;
/**
* 知识库名称
*/
@ExcelProperty(value = "知识库名称")
private String kname;
/**
* 描述
*/
@ExcelProperty(value = "描述")
private String description;
}

View File

@@ -0,0 +1,15 @@
package org.ruoyi.knowledge.mapper;
import org.ruoyi.common.mybatis.core.mapper.BaseMapperPlus;
import org.ruoyi.knowledge.domain.KnowledgeAttach;
import org.ruoyi.knowledge.domain.vo.KnowledgeAttachVo;
/**
* 知识库附件Mapper接口
*
* @author Lion Li
* @date 2024-10-21
*/
public interface KnowledgeAttachMapper extends BaseMapperPlus<KnowledgeAttach, KnowledgeAttachVo> {
}

View File

@@ -0,0 +1,15 @@
package org.ruoyi.knowledge.mapper;
import org.ruoyi.common.mybatis.core.mapper.BaseMapperPlus;
import org.ruoyi.knowledge.domain.KnowledgeFragment;
import org.ruoyi.knowledge.domain.vo.KnowledgeFragmentVo;
/**
* 知识片段Mapper接口
*
* @author Lion Li
* @date 2024-10-21
*/
public interface KnowledgeFragmentMapper extends BaseMapperPlus<KnowledgeFragment, KnowledgeFragmentVo> {
}

View File

@@ -0,0 +1,15 @@
package org.ruoyi.knowledge.mapper;
import org.ruoyi.common.mybatis.core.mapper.BaseMapperPlus;
import org.ruoyi.knowledge.domain.KnowledgeInfo;
import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo;
/**
* 知识库Mapper接口
*
* @author Lion Li
* @date 2024-10-21
*/
public interface KnowledgeInfoMapper extends BaseMapperPlus<KnowledgeInfo, KnowledgeInfoVo> {
}

View File

@@ -0,0 +1,20 @@
package org.ruoyi.knowledge.service;
import java.util.List;
public interface EmbeddingService {
void storeEmbeddings(List<String> chunkList, String kid, String docId,List<String> fidList);
void removeByDocId(String kid,String docId);
void removeByKid(String kid);
List<Double> getQueryVector(String query);
void createSchema(String kid);
void removeByKidAndFid(String kid, String fid);
void saveFragment(String kid, String docId, String fid, String content);
}

View File

@@ -0,0 +1,56 @@
package org.ruoyi.knowledge.service;
import org.ruoyi.common.mybatis.core.page.PageQuery;
import org.ruoyi.common.mybatis.core.page.TableDataInfo;
import org.ruoyi.knowledge.domain.bo.KnowledgeAttachBo;
import org.ruoyi.knowledge.domain.vo.KnowledgeAttachVo;
import java.util.Collection;
import java.util.List;
/**
* 知识库附件Service接口
*
* @author Lion Li
* @date 2024-10-21
*/
public interface IKnowledgeAttachService {
/**
* 查询知识库附件
*/
KnowledgeAttachVo queryById(Long id);
/**
* 查询知识库附件列表
*/
TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery);
/**
* 查询知识库附件列表
*/
List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo);
/**
* 新增知识库附件
*/
Boolean insertByBo(KnowledgeAttachBo bo);
/**
* 修改知识库附件
*/
Boolean updateByBo(KnowledgeAttachBo bo);
/**
* 校验并批量删除知识库附件信息
*/
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
/**
* 删除知识附件
*
* @return
*/
void removeKnowledgeAttach(String kid);
}

View File

@@ -0,0 +1,48 @@
package org.ruoyi.knowledge.service;
import org.ruoyi.common.mybatis.core.page.PageQuery;
import org.ruoyi.common.mybatis.core.page.TableDataInfo;
import org.ruoyi.knowledge.domain.bo.KnowledgeFragmentBo;
import org.ruoyi.knowledge.domain.vo.KnowledgeFragmentVo;
import java.util.Collection;
import java.util.List;
/**
* 知识片段Service接口
*
* @author Lion Li
* @date 2024-10-21
*/
public interface IKnowledgeFragmentService {
/**
* 查询知识片段
*/
KnowledgeFragmentVo queryById(Long id);
/**
* 查询知识片段列表
*/
TableDataInfo<KnowledgeFragmentVo> queryPageList(KnowledgeFragmentBo bo, PageQuery pageQuery);
/**
* 查询知识片段列表
*/
List<KnowledgeFragmentVo> queryList(KnowledgeFragmentBo bo);
/**
* 新增知识片段
*/
Boolean insertByBo(KnowledgeFragmentBo bo);
/**
* 修改知识片段
*/
Boolean updateByBo(KnowledgeFragmentBo bo);
/**
* 校验并批量删除知识片段信息
*/
Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid);
}

View File

@@ -0,0 +1,58 @@
package org.ruoyi.knowledge.service;
import org.ruoyi.common.mybatis.core.page.PageQuery;
import org.ruoyi.common.mybatis.core.page.TableDataInfo;
import org.ruoyi.knowledge.domain.KnowledgeAttach;
import org.ruoyi.knowledge.domain.bo.KnowledgeAttachBo;
import org.ruoyi.knowledge.domain.bo.KnowledgeInfoBo;
import org.ruoyi.knowledge.domain.req.KnowledgeInfoUploadRequest;
import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo;
import java.util.Collection;
import java.util.List;
/**
* 知识库Service接口
*
* @author Lion Li
* @date 2024-10-21
*/
public interface IKnowledgeInfoService {
/**
* 查询知识库
*/
KnowledgeInfoVo queryById(Long id);
/**
* 查询知识库列表
*/
TableDataInfo<KnowledgeInfoVo> queryPageList(KnowledgeInfoBo bo, PageQuery pageQuery);
/**
* 查询知识库列表
*/
List<KnowledgeInfoVo> queryList(KnowledgeInfoBo bo);
/**
* 修改知识库
*/
Boolean updateByBo(KnowledgeInfoBo bo);
/**
* 新增知识库
*/
void saveOne(KnowledgeInfoBo bo);
/**
* 上传附件
*/
void upload(KnowledgeInfoUploadRequest request);
/**
* 删除知识库
*/
void removeKnowledge(String id);
}

View File

@@ -0,0 +1,65 @@
package org.ruoyi.knowledge.service.impl;
import lombok.AllArgsConstructor;
import org.ruoyi.knowledge.chain.vectorizer.Vectorization;
import org.ruoyi.knowledge.chain.vectorstore.VectorStore;
import org.ruoyi.knowledge.service.EmbeddingService;
import org.springframework.stereotype.Service;
import java.util.ArrayList;
import java.util.List;
@Service
@AllArgsConstructor
public class EmbeddingServiceImpl implements EmbeddingService {
private final VectorStore vectorStore;
private final Vectorization vectorization;
/**
* 保存向量数据库
* @param chunkList 文档按行切分的片段
* @param kid 知识库ID
* @param docId 文档ID
*/
@Override
public void storeEmbeddings(List<String> chunkList, String kid, String docId,List<String> fidList) {
List<List<Double>> vectorList = vectorization.batchVectorization(chunkList);
vectorStore.storeEmbeddings(chunkList,vectorList,kid,docId,fidList);
}
@Override
public void removeByDocId(String kid,String docId) {
vectorStore.removeByDocId(kid,docId);
}
@Override
public void removeByKid(String kid) {
vectorStore.removeByKid(kid);
}
@Override
public List<Double> getQueryVector(String query) {
List<Double> queryVector = vectorization.singleVectorization(query);
return queryVector;
}
@Override
public void createSchema(String kid) {
vectorStore.newSchema(kid);
}
@Override
public void removeByKidAndFid(String kid, String fid) {
vectorStore.removeByKidAndFid(kid,fid);
}
@Override
public void saveFragment(String kid, String docId, String fid, String content) {
List<String> chunkList = new ArrayList<>();
List<String> fidList = new ArrayList<>();
chunkList.add(content);
fidList.add(fid);
storeEmbeddings(chunkList,kid,docId,fidList);
}
}

View File

@@ -0,0 +1,125 @@
package org.ruoyi.knowledge.service.impl;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.mybatis.core.page.PageQuery;
import org.ruoyi.common.mybatis.core.page.TableDataInfo;
import org.ruoyi.knowledge.domain.KnowledgeAttach;
import org.ruoyi.knowledge.domain.bo.KnowledgeAttachBo;
import org.ruoyi.knowledge.domain.vo.KnowledgeAttachVo;
import org.ruoyi.knowledge.mapper.KnowledgeAttachMapper;
import org.ruoyi.knowledge.mapper.KnowledgeFragmentMapper;
import org.ruoyi.knowledge.service.IKnowledgeAttachService;
import org.springframework.stereotype.Service;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* 知识库附件Service业务层处理
*
* @author Lion Li
* @date 2024-10-21
*/
@RequiredArgsConstructor
@Service
public class KnowledgeAttachServiceImpl implements IKnowledgeAttachService {
private final KnowledgeAttachMapper baseMapper;
private final KnowledgeFragmentMapper fragmentMapper;
/**
* 查询知识库附件
*/
@Override
public KnowledgeAttachVo queryById(Long id){
return baseMapper.selectVoById(id);
}
/**
* 查询知识库附件列表
*/
@Override
public TableDataInfo<KnowledgeAttachVo> queryPageList(KnowledgeAttachBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
Page<KnowledgeAttachVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库附件列表
*/
@Override
public List<KnowledgeAttachVo> queryList(KnowledgeAttachBo bo) {
LambdaQueryWrapper<KnowledgeAttach> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeAttach> buildQueryWrapper(KnowledgeAttachBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeAttach> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeAttach::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeAttach::getDocId, bo.getDocId());
lqw.like(StringUtils.isNotBlank(bo.getDocName()), KnowledgeAttach::getDocName, bo.getDocName());
lqw.eq(StringUtils.isNotBlank(bo.getDocType()), KnowledgeAttach::getDocType, bo.getDocType());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeAttach::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识库附件
*/
@Override
public Boolean insertByBo(KnowledgeAttachBo bo) {
KnowledgeAttach add = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识库附件
*/
@Override
public Boolean updateByBo(KnowledgeAttachBo bo) {
KnowledgeAttach update = MapstructUtils.convert(bo, KnowledgeAttach.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeAttach entity){
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识库附件
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
@Override
public void removeKnowledgeAttach(String kid) {
HashMap<String, Object> map = new HashMap<>();
map.put("kid", kid);
baseMapper.deleteByMap(map);
fragmentMapper.deleteByMap(map);
}
}

View File

@@ -0,0 +1,113 @@
package org.ruoyi.knowledge.service.impl;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.mybatis.core.page.PageQuery;
import org.ruoyi.common.mybatis.core.page.TableDataInfo;
import org.ruoyi.knowledge.domain.KnowledgeFragment;
import org.ruoyi.knowledge.domain.bo.KnowledgeFragmentBo;
import org.ruoyi.knowledge.domain.vo.KnowledgeFragmentVo;
import org.ruoyi.knowledge.mapper.KnowledgeFragmentMapper;
import org.ruoyi.knowledge.service.IKnowledgeFragmentService;
import org.springframework.stereotype.Service;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/**
* 知识片段Service业务层处理
*
* @author Lion Li
* @date 2024-10-21
*/
@RequiredArgsConstructor
@Service
public class KnowledgeFragmentServiceImpl implements IKnowledgeFragmentService {
private final KnowledgeFragmentMapper baseMapper;
/**
* 查询知识片段
*/
@Override
public KnowledgeFragmentVo queryById(Long id){
return baseMapper.selectVoById(id);
}
/**
* 查询知识片段列表
*/
@Override
public TableDataInfo<KnowledgeFragmentVo> queryPageList(KnowledgeFragmentBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeFragment> lqw = buildQueryWrapper(bo);
lqw.eq(KnowledgeFragment::getDocId, bo.getDocId());
Page<KnowledgeFragmentVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识片段列表
*/
@Override
public List<KnowledgeFragmentVo> queryList(KnowledgeFragmentBo bo) {
LambdaQueryWrapper<KnowledgeFragment> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeFragment> buildQueryWrapper(KnowledgeFragmentBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeFragment> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeFragment::getKid, bo.getKid());
lqw.eq(StringUtils.isNotBlank(bo.getDocId()), KnowledgeFragment::getDocId, bo.getDocId());
lqw.eq(StringUtils.isNotBlank(bo.getFid()), KnowledgeFragment::getFid, bo.getFid());
lqw.eq(bo.getIdx() != null, KnowledgeFragment::getIdx, bo.getIdx());
lqw.eq(StringUtils.isNotBlank(bo.getContent()), KnowledgeFragment::getContent, bo.getContent());
return lqw;
}
/**
* 新增知识片段
*/
@Override
public Boolean insertByBo(KnowledgeFragmentBo bo) {
KnowledgeFragment add = MapstructUtils.convert(bo, KnowledgeFragment.class);
validEntityBeforeSave(add);
boolean flag = baseMapper.insert(add) > 0;
if (flag) {
bo.setId(add.getId());
}
return flag;
}
/**
* 修改知识片段
*/
@Override
public Boolean updateByBo(KnowledgeFragmentBo bo) {
KnowledgeFragment update = MapstructUtils.convert(bo, KnowledgeFragment.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeFragment entity){
//TODO 做一些数据校验,如唯一约束
}
/**
* 批量删除知识片段
*/
@Override
public Boolean deleteWithValidByIds(Collection<Long> ids, Boolean isValid) {
if(isValid){
//TODO 做一些业务上的校验,判断是否需要校验
}
return baseMapper.deleteBatchIds(ids) > 0;
}
}

View File

@@ -0,0 +1,218 @@
package org.ruoyi.knowledge.service.impl;
import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import io.github.ollama4j.OllamaAPI;
import io.github.ollama4j.exceptions.OllamaBaseException;
import io.github.ollama4j.models.chat.OllamaChatMessageRole;
import io.github.ollama4j.models.chat.OllamaChatRequestBuilder;
import io.github.ollama4j.models.chat.OllamaChatRequestModel;
import io.github.ollama4j.models.chat.OllamaChatResult;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.mybatis.core.page.PageQuery;
import org.ruoyi.common.mybatis.core.page.TableDataInfo;
import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.knowledge.chain.loader.ResourceLoader;
import org.ruoyi.knowledge.chain.loader.ResourceLoaderFactory;
import org.ruoyi.knowledge.domain.KnowledgeAttach;
import org.ruoyi.knowledge.domain.KnowledgeFragment;
import org.ruoyi.knowledge.domain.KnowledgeInfo;
import org.ruoyi.knowledge.domain.bo.KnowledgeAttachBo;
import org.ruoyi.knowledge.domain.bo.KnowledgeFragmentBo;
import org.ruoyi.knowledge.domain.bo.KnowledgeInfoBo;
import org.ruoyi.knowledge.domain.req.KnowledgeInfoUploadRequest;
import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo;
import org.ruoyi.knowledge.mapper.KnowledgeAttachMapper;
import org.ruoyi.knowledge.mapper.KnowledgeFragmentMapper;
import org.ruoyi.knowledge.mapper.KnowledgeInfoMapper;
import org.ruoyi.knowledge.service.EmbeddingService;
import org.ruoyi.knowledge.service.IKnowledgeAttachService;
import org.ruoyi.knowledge.service.IKnowledgeFragmentService;
import org.ruoyi.knowledge.service.IKnowledgeInfoService;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
import java.io.IOException;
import java.time.LocalDateTime;
import java.util.*;
/**
* 知识库Service业务层处理
*
* @author Lion Li
* @date 2024-10-21
*/
@RequiredArgsConstructor
@Service
public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
private final KnowledgeInfoMapper baseMapper;
private final EmbeddingService embeddingService;
private final ResourceLoaderFactory resourceLoaderFactory;
private final KnowledgeFragmentMapper fragmentMapper;
private final KnowledgeAttachMapper attachMapper;
/**
* 查询知识库
*/
@Override
public KnowledgeInfoVo queryById(Long id){
return baseMapper.selectVoById(id);
}
/**
* 查询知识库列表
*/
@Override
public TableDataInfo<KnowledgeInfoVo> queryPageList(KnowledgeInfoBo bo, PageQuery pageQuery) {
LambdaQueryWrapper<KnowledgeInfo> lqw = buildQueryWrapper(bo);
Page<KnowledgeInfoVo> result = baseMapper.selectVoPage(pageQuery.build(), lqw);
return TableDataInfo.build(result);
}
/**
* 查询知识库列表
*/
@Override
public List<KnowledgeInfoVo> queryList(KnowledgeInfoBo bo) {
LambdaQueryWrapper<KnowledgeInfo> lqw = buildQueryWrapper(bo);
return baseMapper.selectVoList(lqw);
}
private LambdaQueryWrapper<KnowledgeInfo> buildQueryWrapper(KnowledgeInfoBo bo) {
Map<String, Object> params = bo.getParams();
LambdaQueryWrapper<KnowledgeInfo> lqw = Wrappers.lambdaQuery();
lqw.eq(StringUtils.isNotBlank(bo.getKid()), KnowledgeInfo::getKid, bo.getKid());
lqw.eq(bo.getUid() != null, KnowledgeInfo::getUid, bo.getUid());
lqw.like(StringUtils.isNotBlank(bo.getKname()), KnowledgeInfo::getKname, bo.getKname());
lqw.eq(StringUtils.isNotBlank(bo.getDescription()), KnowledgeInfo::getDescription, bo.getDescription());
return lqw;
}
/**
* 修改知识库
*/
@Override
public Boolean updateByBo(KnowledgeInfoBo bo) {
KnowledgeInfo update = MapstructUtils.convert(bo, KnowledgeInfo.class);
validEntityBeforeSave(update);
return baseMapper.updateById(update) > 0;
}
/**
* 保存前的数据校验
*/
private void validEntityBeforeSave(KnowledgeInfo entity){
//TODO 做一些数据校验,如唯一约束
}
@Override
public void saveOne(KnowledgeInfoBo bo) {
KnowledgeInfo knowledgeInfo = MapstructUtils.convert(bo, KnowledgeInfo.class);
if (StringUtils.isBlank(bo.getKid())){
String kid = RandomUtil.randomString(10);
if (knowledgeInfo != null) {
knowledgeInfo.setKid(kid);
knowledgeInfo.setUid(LoginHelper.getLoginUser().getUserId());
}
baseMapper.insert(knowledgeInfo);
embeddingService.createSchema(kid);
}else {
baseMapper.updateById(knowledgeInfo);
}
}
@Override
public void upload(KnowledgeInfoUploadRequest request) {
storeContent(request.getFile(), request.getKid());
}
public void storeContent(MultipartFile file, String kid) {
String fileName = file.getOriginalFilename();
List<String> chunkList = new ArrayList<>();
KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
knowledgeAttach.setKid(kid);
String docId = RandomUtil.randomString(10);
knowledgeAttach.setDocId(docId);
knowledgeAttach.setDocName(fileName);
knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1));
String content = "";
ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType());
List<String> fids = new ArrayList<>();
try {
content = resourceLoader.getContent(file.getInputStream());
chunkList = resourceLoader.getChunkList(content);
for (int i = 0; i < chunkList.size(); i++) {
String fid = RandomUtil.randomString(16);
fids.add(fid);
KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
knowledgeFragment.setKid(kid);
knowledgeFragment.setDocId(docId);
knowledgeFragment.setFid(fid);
knowledgeFragment.setIdx(i);
// String text = convertTextBlockToPretrainData(chunkList.get(i));
knowledgeFragment.setContent(chunkList.get(i));
knowledgeFragment.setCreateTime(new Date());
fragmentMapper.insert(knowledgeFragment);
}
} catch (IOException e) {
e.printStackTrace();
}
knowledgeAttach.setContent(content);
knowledgeAttach.setCreateTime(new Date());
attachMapper.insert(knowledgeAttach);
embeddingService.storeEmbeddings(chunkList,kid,docId,fids);
}
@Override
public void removeKnowledge(String id) {
Map<String,Object> map = new HashMap<>();
map.put("kid",id);
// 删除知识库
baseMapper.deleteByMap(map);
// 删除附件和知识片段
fragmentMapper.deleteByMap(map);
attachMapper.deleteByMap(map);
// 删除向量库信息
embeddingService.removeByKid(id);
}
/**
* 将文本块转换为预训练数据
* @param chunk 解析文本块
*/
public String convertTextBlockToPretrainData(String chunk){
String host = "http://localhost:11434/";
OllamaAPI ollama = new OllamaAPI(host);
OllamaChatRequestBuilder builder = OllamaChatRequestBuilder.getInstance("qwen2.5:7b");
// 设置超时时间
ollama.setRequestTimeoutSeconds(100);
// create first user question
String json = "instruction:用户指令,根据语义提取一个关键词;input:用户输入,根据语义提取多个关键词;output:输出文本内容";
OllamaChatRequestModel requestModel = builder.withMessage
(OllamaChatMessageRole.USER, "文本:"+chunk+"理解文本内容,并且将文本内容转换为:"+json+",输出JSON格式不要包含其他无关内容,内部使用无需脱敏")
.build();
// start conversation with model
OllamaChatResult chatResult = null;
try {
chatResult = ollama.chat(requestModel);
} catch (Exception e) {
System.out.println("解析失败!");
}
return chatResult.getResponse();
}
}