|
@@ -6,24 +6,27 @@ import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
|
|
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
|
|
import cn.iocoder.yudao.framework.common.util.collection.CollectionUtils;
|
|
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
|
import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
|
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO;
|
|
import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeDocumentCreateReqVO;
|
|
|
|
+import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
|
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
|
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
|
|
import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
|
|
|
|
+import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
|
|
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
|
|
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
|
|
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper;
|
|
import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper;
|
|
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
|
|
import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
|
|
|
|
+import cn.iocoder.yudao.module.ai.service.model.AiApiKeyService;
|
|
|
|
+import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
|
|
import jakarta.annotation.Resource;
|
|
import jakarta.annotation.Resource;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.springframework.ai.document.Document;
|
|
import org.springframework.ai.document.Document;
|
|
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
|
import org.springframework.ai.reader.tika.TikaDocumentReader;
|
|
import org.springframework.ai.tokenizer.TokenCountEstimator;
|
|
import org.springframework.ai.tokenizer.TokenCountEstimator;
|
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
|
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
|
|
-import org.springframework.ai.vectorstore.RedisVectorStore;
|
|
|
|
|
|
+import org.springframework.ai.vectorstore.VectorStore;
|
|
import org.springframework.core.io.ByteArrayResource;
|
|
import org.springframework.core.io.ByteArrayResource;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.stereotype.Service;
|
|
import org.springframework.transaction.annotation.Transactional;
|
|
import org.springframework.transaction.annotation.Transactional;
|
|
|
|
|
|
import java.util.List;
|
|
import java.util.List;
|
|
-import java.util.Objects;
|
|
|
|
|
|
|
|
/**
|
|
/**
|
|
* AI 知识库-文档 Service 实现类
|
|
* AI 知识库-文档 Service 实现类
|
|
@@ -42,9 +45,14 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
|
@Resource
|
|
@Resource
|
|
private TokenTextSplitter tokenTextSplitter;
|
|
private TokenTextSplitter tokenTextSplitter;
|
|
@Resource
|
|
@Resource
|
|
- private TokenCountEstimator TOKEN_COUNT_ESTIMATOR;
|
|
|
|
|
|
+ private TokenCountEstimator tokenCountEstimator;
|
|
|
|
+
|
|
|
|
+ @Resource
|
|
|
|
+ private AiApiKeyService apiKeyService;
|
|
|
|
+ @Resource
|
|
|
|
+ private AiKnowledgeService knowledgeService;
|
|
@Resource
|
|
@Resource
|
|
- private RedisVectorStore vectorStore;
|
|
|
|
|
|
+ private AiChatModelService chatModelService;
|
|
|
|
|
|
|
|
|
|
// TODO 芋艿:需要 review 下,代码格式;
|
|
// TODO 芋艿:需要 review 下,代码格式;
|
|
@@ -53,18 +61,18 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
|
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
|
|
public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
|
|
// 1.1 下载文档
|
|
// 1.1 下载文档
|
|
String url = createReqVO.getUrl();
|
|
String url = createReqVO.getUrl();
|
|
- TikaDocumentReader loader = new TikaDocumentReader(downloadFile(url));
|
|
|
|
// 1.2 加载文档
|
|
// 1.2 加载文档
|
|
|
|
+ TikaDocumentReader loader = new TikaDocumentReader(downloadFile(url));
|
|
List<Document> documents = loader.get();
|
|
List<Document> documents = loader.get();
|
|
Document document = CollUtil.getFirst(documents);
|
|
Document document = CollUtil.getFirst(documents);
|
|
- // TODO @xin:是不是不存在,就抛出异常呀;厚泽 return 呀;
|
|
|
|
- Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
|
|
|
|
- Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
|
|
|
|
|
|
+ String content = document.getContent();
|
|
|
|
+ Integer tokens = tokenCountEstimator.estimate(content);
|
|
|
|
+ Integer wordCount = content.length();
|
|
|
|
|
|
|
|
+ // 1.3 文档记录入库
|
|
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
|
|
AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
|
|
.setTokens(tokens).setWordCount(wordCount)
|
|
.setTokens(tokens).setWordCount(wordCount)
|
|
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
|
|
.setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
|
|
- // 1.2 文档记录入库
|
|
|
|
documentMapper.insert(documentDO);
|
|
documentMapper.insert(documentDO);
|
|
Long documentId = documentDO.getId();
|
|
Long documentId = documentDO.getId();
|
|
if (CollUtil.isEmpty(documents)) {
|
|
if (CollUtil.isEmpty(documents)) {
|
|
@@ -75,11 +83,16 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
|
|
List<Document> segments = tokenTextSplitter.apply(documents);
|
|
List<Document> segments = tokenTextSplitter.apply(documents);
|
|
// 2.2 分段内容入库
|
|
// 2.2 分段内容入库
|
|
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
|
|
List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
|
|
- segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
|
|
|
|
- .setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
|
|
|
|
|
|
+ segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId).setKnowledgeId(createReqVO.getKnowledgeId())
|
|
|
|
+ .setTokens(tokenCountEstimator.estimate(segment.getContent())).setWordCount(segment.getContent().length())
|
|
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
|
.setStatus(CommonStatusEnum.ENABLE.getStatus()));
|
|
segmentMapper.insertBatch(segmentDOList);
|
|
segmentMapper.insertBatch(segmentDOList);
|
|
- // 3 向量化并存储
|
|
|
|
|
|
+
|
|
|
|
+ AiKnowledgeDO knowledge = knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId());
|
|
|
|
+ AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
|
|
|
|
+ // 3.1 获取向量存储实例
|
|
|
|
+ VectorStore vectorStore = apiKeyService.getOrCreateVectorStore(model.getKeyId());
|
|
|
|
+ // 3.2 向量化并存储
|
|
vectorStore.add(segments);
|
|
vectorStore.add(segments);
|
|
return documentId;
|
|
return documentId;
|
|
}
|
|
}
|