Преглед на файлове

【解决todo】AI 知识库

xiaoxin преди 9 месеца
родител
ревизия
ed2296e4c7
променени са 10 файла, в които са добавени 39 реда и са изтрити 105 реда
  1. 2 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeController.java
  2. 2 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java
  3. 1 1
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java
  4. 2 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeBaseMapper.java
  5. 0 27
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingService.java
  6. 0 35
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingServiceImpl.java
  7. 11 13
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java
  8. 1 1
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java
  9. 11 20
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java
  10. 9 1
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/config/YudaoAiAutoConfiguration.java

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeController.java

@@ -3,7 +3,7 @@ package cn.iocoder.yudao.module.ai.controller.admin.knowledge;
 import cn.iocoder.yudao.framework.common.pojo.CommonResult;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
-import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeBaseService;
+import cn.iocoder.yudao.module.ai.service.knowledge.AiKnowledgeService;
 import io.swagger.v3.oas.annotations.Operation;
 import io.swagger.v3.oas.annotations.tags.Tag;
 import jakarta.annotation.Resource;
@@ -19,7 +19,7 @@ import static cn.iocoder.yudao.framework.security.core.util.SecurityFrameworkUti
 public class AiKnowledgeController {
 
     @Resource
-    private AiKnowledgeBaseService knowledgeBaseService;
+    private AiKnowledgeService knowledgeBaseService;
 
     @PostMapping("/create-my")
     @Operation(summary = "创建【我的】知识库")

+ 2 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeBaseDO.java → yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java

@@ -10,15 +10,14 @@ import lombok.Data;
 
 import java.util.List;
 
-// TODO @xin:要不把 AiKnowledgeBaseDO 改成 AiKnowledgeDO。感觉 base 后缀,感觉有点奇怪(让人以为是基类)。然后,我们很多地方的外键编号,都是 knowledgeId
 /**
  * AI 知识库 DO
  *
  * @author xiaoxin
  */
-@TableName(value = "ai_knowledge_base", autoResultMap = true)
+@TableName(value = "ai_knowledge", autoResultMap = true)
 @Data
-public class AiKnowledgeBaseDO extends BaseDO {
+public class AiKnowledgeDO extends BaseDO {
 
     /**
      * 编号

+ 1 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java

@@ -24,7 +24,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
     /**
      * 知识库编号
      *
-     * 关联 {@link AiKnowledgeBaseDO#getId()}
+     * 关联 {@link AiKnowledgeDO#getId()}
      */
     private Long knowledgeId;
     /**

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeBaseMapper.java

@@ -1,7 +1,7 @@
 package cn.iocoder.yudao.module.ai.dal.mysql.knowledge;
 
 import cn.iocoder.yudao.framework.mybatis.core.mapper.BaseMapperX;
-import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO;
+import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
 import org.apache.ibatis.annotations.Mapper;
 
 /**
@@ -10,5 +10,5 @@ import org.apache.ibatis.annotations.Mapper;
  * @author xiaoxin
  */
 @Mapper
-public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeBaseDO> {
+public interface AiKnowledgeBaseMapper extends BaseMapperX<AiKnowledgeDO> {
 }

+ 0 - 27
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingService.java

@@ -1,27 +0,0 @@
-package cn.iocoder.yudao.module.ai.service.knowledge;
-
-import org.springframework.ai.document.Document;
-import org.springframework.ai.vectorstore.SearchRequest;
-
-import java.util.List;
-
-/**
- * AI 嵌入 Service 接口
- *
- * @author xiaoxin
- */
-public interface AiEmbeddingService {
-
-    /**
-     * 向量化文档并存储
-     */
-    void add(List<Document> documents);
-
-    /**
-     * 相似查询
-     *
-     * @param request 查询实体
-     */
-    List<Document> similaritySearch(SearchRequest request);
-
-}

+ 0 - 35
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiEmbeddingServiceImpl.java

@@ -1,35 +0,0 @@
-package cn.iocoder.yudao.module.ai.service.knowledge;
-
-import jakarta.annotation.Resource;
-import org.springframework.ai.document.Document;
-import org.springframework.ai.vectorstore.RedisVectorStore;
-import org.springframework.ai.vectorstore.SearchRequest;
-import org.springframework.stereotype.Service;
-
-import java.util.List;
-
-// TODO @xin:是不是不用 AiEmbeddingServiceImpl,直接 vectorStore 注入到需要的地方就好啦。通过 KnowledgeDocumentService 返回就好。
-/**
- * AI 嵌入 Service 实现类
- *
- * @author xiaoxin
- */
-@Service
-public class AiEmbeddingServiceImpl implements AiEmbeddingService {
-
-    @Resource
-    private RedisVectorStore vectorStore;
-
-    @Override
-//    @Async
-    // TODO xiaoxin 报错先注释
-    public void add(List<Document> documents) {
-        vectorStore.add(documents);
-    }
-
-    @Override
-    public List<Document> similaritySearch(SearchRequest request) {
-        return vectorStore.similaritySearch(request);
-    }
-
-}

+ 11 - 13
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -14,8 +14,9 @@ import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
 import org.springframework.ai.reader.tika.TikaDocumentReader;
-import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
+import org.springframework.ai.tokenizer.TokenCountEstimator;
 import org.springframework.ai.transformer.splitter.TokenTextSplitter;
+import org.springframework.ai.vectorstore.RedisVectorStore;
 import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
@@ -39,52 +40,49 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
 
     @Resource
     private TokenTextSplitter tokenTextSplitter;
-
     @Resource
-    private AiEmbeddingService embeddingService;
+    private TokenCountEstimator TOKEN_COUNT_ESTIMATOR;
+    @Resource
+    private RedisVectorStore vectorStore;
 
-    // TODO @xin:@Resource 注入
-    private static final JTokkitTokenCountEstimator TOKEN_COUNT_ESTIMATOR = new JTokkitTokenCountEstimator();
 
     // TODO xiaoxin 临时测试用,后续删
     @Value("classpath:/webapp/test/Fel.pdf")
     private org.springframework.core.io.Resource data;
 
     // TODO 芋艿:需要 review 下,代码格式;
-    // TODO @xin:最好有 1、/2、/3 这种,让代码更有层次感
     @Override
     @Transactional(rollbackFor = Exception.class)
     public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
         // TODO xiaoxin 后续从 url 加载
         TikaDocumentReader loader = new TikaDocumentReader(data);
-        // 加载文档
+        // 1.1 加载文档
         List<Document> documents = loader.get();
         Document document = CollUtil.getFirst(documents);
         // TODO @xin:是不是不存在,就抛出异常呀;厚泽 return 呀;
-        // TODO 芋艿 文档层面有没有可能会比较大,这两个字段是否可以从分段表计算得出?回复:先直接算;
         Integer tokens = Objects.nonNull(document) ? TOKEN_COUNT_ESTIMATOR.estimate(document.getContent()) : 0;
         Integer wordCount = Objects.nonNull(document) ? document.getContent().length() : 0;
 
         AiKnowledgeDocumentDO documentDO = BeanUtils.toBean(createReqVO, AiKnowledgeDocumentDO.class)
                 .setTokens(tokens).setWordCount(wordCount)
                 .setStatus(CommonStatusEnum.ENABLE.getStatus()).setSliceStatus(AiKnowledgeDocumentStatusEnum.SUCCESS.getStatus());
-        // 文档记录入库
+        // 1.2 文档记录入库
         documentMapper.insert(documentDO);
         Long documentId = documentDO.getId();
         if (CollUtil.isEmpty(documents)) {
             return documentId;
         }
 
-        // 文档分段
+        // 2.1 文档分段
         List<Document> segments = tokenTextSplitter.apply(documents);
-        // 分段内容入库
+        // 2.2 分段内容入库
         List<AiKnowledgeSegmentDO> segmentDOList = CollectionUtils.convertList(segments,
                 segment -> new AiKnowledgeSegmentDO().setContent(segment.getContent()).setDocumentId(documentId)
                         .setTokens(TOKEN_COUNT_ESTIMATOR.estimate(segment.getContent())).setWordCount(segment.getContent().length())
                         .setStatus(CommonStatusEnum.ENABLE.getStatus()));
         segmentMapper.insertBatch(segmentDOList);
-        // 向量化并存储
-        embeddingService.add(segments);
+        // 3 向量化并存储
+        vectorStore.add(segments);
         return documentId;
     }
 

+ 1 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeBaseService.java → yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java

@@ -7,7 +7,7 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdat
  *
  * @author xiaoxin
  */
-public interface AiKnowledgeBaseService {
+public interface AiKnowledgeService {
 
     /**
      * 创建【我的】知识库

+ 11 - 20
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeBaseServiceImpl.java → yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java

@@ -1,12 +1,11 @@
 package cn.iocoder.yudao.module.ai.service.knowledge;
 
-import cn.hutool.core.lang.Assert;
 import cn.hutool.core.util.ObjUtil;
 import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeCreateMyReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.AiKnowledgeUpdateMyReqVO;
-import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeBaseDO;
+import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
 import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeBaseMapper;
 import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
@@ -24,7 +23,7 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
  */
 @Service
 @Slf4j
-public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
+public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 
     @Resource
     private AiChatModelService chatModalService;
@@ -34,42 +33,34 @@ public class AiKnowledgeBaseServiceImpl implements AiKnowledgeBaseService {
 
     @Override
     public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
-        // TODO @xin:貌似直接调用 chatModalService.validateChatModel(id) 完事,不用搞个方法
         // 1. 校验模型配置
-        AiChatModelDO model = validateChatModel(createReqVO.getModelId());
+        AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
 
         // 2. 插入知识库
-        // TODO @xin:不用 DO 结尾
-        AiKnowledgeBaseDO knowledgeBaseDO = BeanUtils.toBean(createReqVO, AiKnowledgeBaseDO.class)
+        AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
                 .setModel(model.getModel()).setUserId(userId).setStatus(CommonStatusEnum.ENABLE.getStatus());
-        knowledgeBaseMapper.insert(knowledgeBaseDO);
-        return knowledgeBaseDO.getId();
+        knowledgeBaseMapper.insert(knowledgeBase);
+        return knowledgeBase.getId();
     }
 
     @Override
     public void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId) {
         // 1.1 校验知识库存在
-        AiKnowledgeBaseDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
+        AiKnowledgeDO knowledgeBaseDO = validateKnowledgeExists(updateReqVO.getId());
         if (ObjUtil.notEqual(knowledgeBaseDO.getUserId(), userId)) {
             throw exception(KNOWLEDGE_NOT_EXISTS);
         }
         // 1.2 校验模型配置
-        AiChatModelDO model = validateChatModel(updateReqVO.getModelId());
+        AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
 
         // 2. 更新知识库
-        AiKnowledgeBaseDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeBaseDO.class);
+        AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
         updateDO.setModel(model.getModel());
         knowledgeBaseMapper.updateById(updateDO);
     }
 
-    private AiChatModelDO validateChatModel(Long id) {
-        AiChatModelDO model = chatModalService.validateChatModel(id);
-        Assert.notNull(model, "未找到对应嵌入模型");
-        return model;
-    }
-
-    public AiKnowledgeBaseDO validateKnowledgeExists(Long id) {
-        AiKnowledgeBaseDO knowledgeBase = knowledgeBaseMapper.selectById(id);
+    public AiKnowledgeDO validateKnowledgeExists(Long id) {
+        AiKnowledgeDO knowledgeBase = knowledgeBaseMapper.selectById(id);
         if (knowledgeBase == null) {
             throw exception(KNOWLEDGE_NOT_EXISTS);
         }

+ 9 - 1
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/config/YudaoAiAutoConfiguration.java

@@ -13,6 +13,8 @@ import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.autoconfigure.vectorstore.redis.RedisVectorStoreProperties;
 import org.springframework.ai.document.MetadataMode;
 import org.springframework.ai.embedding.EmbeddingModel;
+import org.springframework.ai.tokenizer.JTokkitTokenCountEstimator;
+import org.springframework.ai.tokenizer.TokenCountEstimator;
 import org.springframework.ai.transformer.splitter.TokenTextSplitter;
 import org.springframework.ai.transformers.TransformersEmbeddingModel;
 import org.springframework.ai.vectorstore.RedisVectorStore;
@@ -90,7 +92,7 @@ public class YudaoAiAutoConfiguration {
     }
 
     /**
-     * 我们启动有加载很多 Embedding 模型,不晓得取哪个好,先 new 个 TransformersEmbeddingModel 跑
+     * TODO @xin 抽离出去,根据具体模型走
      */
     @Bean
     @Lazy // TODO 芋艿:临时注释,避免无法启动
@@ -114,4 +116,10 @@ public class YudaoAiAutoConfiguration {
         return new TokenTextSplitter(500, 100, 5, 10000, true);
     }
 
+    @Bean
+    @Lazy // TODO 芋艿:临时注释,避免无法启动
+    public TokenCountEstimator tokenCountEstimator() {
+        return new JTokkitTokenCountEstimator();
+    }
+
 }