فهرست منبع

【代码评审】AI 大模型:知识库的逻辑

YunaiV 9 ماه پیش
والد
کامیت
8e56b81a3a
14فایلهای تغییر یافته به همراه25 افزوده شده و 17 حذف شده
  1. 1 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeCreateMyReqVO.java
  2. 1 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java
  3. 1 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java
  4. 2 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java
  5. 1 1
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java
  6. 2 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java
  7. 1 1
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java
  8. 1 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java
  9. 8 8
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java
  10. 2 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java
  11. 2 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java
  12. 1 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/model/AiApiKeyServiceImpl.java
  13. 1 1
      yudao-module-ai/yudao-spring-boot-starter-ai/pom.xml
  14. 1 0
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java

+ 1 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeCreateMyReqVO.java

@@ -32,4 +32,5 @@ public class AiKnowledgeCreateMyReqVO {
     @Schema(description = "topK", requiredMode = Schema.RequiredMode.REQUIRED, example = "3")
     @NotNull(message = "topK 不能为空")
     private Integer topK;
+
 }

+ 1 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java

@@ -42,4 +42,5 @@ public class AiKnowledgeDocumentCreateReqVO {
     @Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
     @NotNull(message = "分块是否保留分隔符不能为空")
     private Boolean keepSeparator;
+
 }

+ 1 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java

@@ -57,17 +57,16 @@ public class AiKnowledgeDO extends BaseDO {
      * topK
      */
     private Integer topK;
-
     /**
      * 相似度阈值
      */
     private Double similarityThreshold;
 
-
     /**
      * 状态
      * <p>
      * 枚举 {@link CommonStatusEnum}
      */
     private Integer status;
+
 }

+ 2 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java

@@ -47,10 +47,12 @@ public class AiKnowledgeDocumentDO extends BaseDO {
      * 字符数
      */
     private Integer wordCount;
+    // TODO @新:chunk 1)是不是 segment,这样命名保持一致会好点哈?2)Size 是不是改成 Tokens 会统一点;3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
     /**
      * 每个文本块的目标 token 数
      */
     private Integer defaultChunkSize;
+    // TODO @xin:SizeChars 和 wordCount 好像是一个意思,是不是也要统一哈。
     /**
      * 每个文本块的最小字符数
      */

+ 1 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java

@@ -27,7 +27,7 @@ public class AiKnowledgeSegmentDO extends BaseDO {
     /**
      * 向量库的编号
      */
-    @TableField(updateStrategy = FieldStrategy.ALWAYS)
+    @TableField(updateStrategy = FieldStrategy.ALWAYS) // TODO @新:尽量规避要这个注解。万一后面加个 status 单独更新,可能会踩坑。
     private String vectorId;
     /**
      * 知识库编号

+ 2 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java

@@ -25,9 +25,11 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegment
                 .orderByDesc(AiKnowledgeSegmentDO::getId));
     }
 
+    // TODO @新:selectListByXXX 哈
     default List<AiKnowledgeSegmentDO> selectList(List<String> vectorIdList) {
         return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
                 .in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
                 .orderByDesc(AiKnowledgeSegmentDO::getId));
     }
+
 }

+ 1 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -83,7 +83,7 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
                         .setStatus(CommonStatusEnum.ENABLE.getStatus()));
         segmentMapper.insertBatch(segmentDOList);
 
-        // 3.2 向量化并存储
+        // 3. 向量化并存储
         segments.forEach(segment -> segment.getMetadata().put(AiKnowledgeSegmentDO.FIELD_KNOWLEDGE_ID, createReqVO.getKnowledgeId()));
         vectorStore.add(segments);
         return documentId;

+ 1 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java

@@ -38,9 +38,8 @@ public interface AiKnowledgeSegmentService {
      */
     void updateKnowledgeSegmentStatus(AiKnowledgeSegmentUpdateStatusReqVO reqVO);
 
-
     /**
-     * 段落召回
+     * 召回段落
      *
      * @param reqVO 召回请求信息
      * @return 召回的段落

+ 8 - 8
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java

@@ -55,19 +55,19 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
 
     @Override
     public void updateKnowledgeSegment(AiKnowledgeSegmentUpdateReqVO reqVO) {
-        // 0 校验
+        // 1. 校验
         AiKnowledgeSegmentDO oldKnowledgeSegment = validateKnowledgeSegmentExists(reqVO.getId());
+
         // 2.1 获取知识库向量实例
         VectorStore vectorStore = knowledgeService.getVectorStoreById(oldKnowledgeSegment.getKnowledgeId());
         // 2.2 删除原向量
         vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
-
         // 2.3 重新向量化
         Document document = new Document(reqVO.getContent());
         document.getMetadata().put(AiKnowledgeSegmentDO.FIELD_KNOWLEDGE_ID, oldKnowledgeSegment.getKnowledgeId());
         vectorStore.add(List.of(document));
 
-        // 2.1 更新段落内容
+        // 3. 更新段落内容
         AiKnowledgeSegmentDO knowledgeSegment = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class);
         knowledgeSegment.setVectorId(document.getId());
         segmentMapper.updateById(knowledgeSegment);
@@ -98,14 +98,14 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
 
     @Override
     public List<AiKnowledgeSegmentDO> similaritySearch(AiKnowledgeSegmentSearchReqVO reqVO) {
-        // 0. 校验
+        // 1. 校验
         AiKnowledgeDO knowledge = knowledgeService.validateKnowledgeExists(reqVO.getKnowledgeId());
         AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
 
-        // 1.1 获取向量存储实例
+        // 2. 获取向量存储实例
         VectorStore vectorStore = apiKeyService.getOrCreateVectorStore(model.getKeyId());
 
-        // 1.2 向量检索
+        // 3.1 向量检索
         List<Document> documentList = vectorStore.similaritySearch(SearchRequest.query(reqVO.getContent())
                 .withTopK(knowledge.getTopK())
                 .withSimilarityThreshold(knowledge.getSimilarityThreshold())
@@ -113,11 +113,10 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         if (CollUtil.isEmpty(documentList)) {
             return ListUtil.empty();
         }
-        // 2.1 段落召回
+        // 3.2 段落召回
         return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class));
     }
 
-
     /**
      * 校验段落是否存在
      *
@@ -131,4 +130,5 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         }
         return knowledgeSegment;
     }
+
 }

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java

@@ -23,7 +23,6 @@ public interface AiKnowledgeService {
      */
     Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId);
 
-
     /**
      * 创建【我的】知识库
      *
@@ -32,7 +31,6 @@ public interface AiKnowledgeService {
      */
     void updateKnowledgeMy(AiKnowledgeUpdateMyReqVO updateReqVO, Long userId);
 
-
     /**
      * 校验知识库是否存在
      *
@@ -49,6 +47,7 @@ public interface AiKnowledgeService {
      */
     PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
 
+    // TODO @新:knowledgeId 和 validateKnowledgeExists 的 id 是同一个么?如果是的话,建议变量也用 id 哈,然后两边的 id 注释,保持一致
     /**
      * 根据知识库编号获取向量存储实例
      *
@@ -56,4 +55,5 @@ public interface AiKnowledgeService {
      * @return 向量存储实例
      */
     VectorStore getVectorStoreById(Long knowledgeId);
+
 }

+ 2 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java

@@ -38,6 +38,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
     private AiChatModelService chatModelService;
     @Resource
     private AiApiKeyService apiKeyService;
+    // TODO @新:chatModelService 和 apiKeyService 可以放到 33 行的 chatModalService 后面。尽量保持,想通类型的变量在一块。例如说,Service 一块,Mapper 一块。
 
     @Override
     public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
@@ -85,6 +86,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
     public VectorStore getVectorStoreById(Long knowledgeId) {
         AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId);
         AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
+        // 创建或获取 VectorStore 对象
         return apiKeyService.getOrCreateVectorStore(model.getKeyId());
     }
 

+ 1 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/model/AiApiKeyServiceImpl.java

@@ -146,6 +146,7 @@ public class AiApiKeyServiceImpl implements AiApiKeyService {
     public VectorStore getOrCreateVectorStore(Long id) {
         AiApiKeyDO apiKey = validateApiKey(id);
         AiPlatformEnum platform = AiPlatformEnum.validatePlatform(apiKey.getPlatform());
+        // 创建或获取 VectorStore 对象
         return modelFactory.getOrCreateVectorStore(getEmbeddingModel(id), platform, apiKey.getApiKey(), apiKey.getUrl());
     }
 

+ 1 - 1
yudao-module-ai/yudao-spring-boot-starter-ai/pom.xml

@@ -47,7 +47,7 @@
 
         <!-- 向量化,基于 Redis 存储,Tika 解析内容 -->
 
-        <!-- 暂不做经济型,先注释 -->
+        <!-- 暂不做经济型,先注释 TODO 经济型是啥呀? -->
         <!--        <dependency>-->
         <!--            <groupId>${spring-ai.groupId}</groupId>-->
         <!--            <artifactId>spring-ai-transformers-spring-boot-starter</artifactId>-->

+ 1 - 0
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java

@@ -197,6 +197,7 @@ public class AiModelFactoryImpl implements AiModelFactory {
         });
     }
 
+    // TODO @新:貌似可以创建一个大的 VectorStore。然后搜的时候,通过 Filter.Expression 过滤对应的数据。
     @Override
     public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
         String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);