Browse Source

【解决todo】AI 知识库: 字段命名统一 补充注释

xiaoxin 10 months ago
parent
commit
5cd870748d
11 changed files with 38 additions and 41 deletions
  1. 1 1
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java
  2. 10 10
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java
  3. 4 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java
  4. 11 7
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java
  5. 0 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java
  6. 1 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java
  7. 2 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java
  8. 2 2
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java
  9. 2 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java
  10. 5 8
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java
  11. 0 1
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java

+ 1 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/AiKnowledgeSegmentController.java

@@ -29,7 +29,7 @@ public class AiKnowledgeSegmentController {
 
     @GetMapping("/page")
     @Operation(summary = "获取段落分页")
-    public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPageMy(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
+    public CommonResult<PageResult<AiKnowledgeSegmentRespVO>> getKnowledgeSegmentPage(@Valid AiKnowledgeSegmentPageReqVO pageReqVO) {
         PageResult<AiKnowledgeSegmentDO> pageResult = segmentService.getKnowledgeSegmentPage(pageReqVO);
         return success(BeanUtils.toBean(pageResult, AiKnowledgeSegmentRespVO.class));
     }

+ 10 - 10
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java

@@ -23,21 +23,21 @@ public class AiKnowledgeDocumentCreateReqVO {
     @URL(message = "文档 URL 格式不正确")
     private String url;
 
-    @Schema(description = "每个文本块的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
-    @NotNull(message = "每个文本块的目标 token 数不能为空")
-    private Integer defaultChunkSize;
+    @Schema(description = "每个段落的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
+    @NotNull(message = "每个段落的目标 token 数不能为空")
+    private Integer defaultSegmentTokens;
 
-    @Schema(description = "每个文本块的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
-    @NotNull(message = "每个文本块的最小字符数不能为空")
-    private Integer minChunkSizeChars;
+    @Schema(description = "每个段落的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
+    @NotNull(message = "每个段落的最小字符数不能为空")
+    private Integer minSegmentWordCount;
 
-    @Schema(description = "丢弃阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
+    @Schema(description = "丢弃阈值:低于此阈值的段落会被丢弃", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
     @NotNull(message = "丢弃阈值不能为空")
     private Integer minChunkLengthToEmbed;
 
-    @Schema(description = "最大数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
-    @NotNull(message = "最大数不能为空")
-    private Integer maxNumChunks;
+    @Schema(description = "最大段落数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
+    @NotNull(message = "最大段落数不能为空")
+    private Integer maxNumSegments;
 
     @Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
     @NotNull(message = "分块是否保留分隔符不能为空")

+ 4 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java

@@ -38,9 +38,11 @@ public class AiKnowledgeDO extends BaseDO {
      * 知识库描述
      */
     private String description;
-    // TODO @新:如果全部可见,需要怎么设置?
+
     /**
-     * 可见权限,只能选择哪些人可见
+     * 可见权限,选择哪些人可见
+     * <p>
+     * -1 所有人可见,其他为各自用户编号
      */
     @TableField(typeHandler = JacksonTypeHandler.class)
     private List<Long> visibilityPermissions;

+ 11 - 7
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java

@@ -40,23 +40,25 @@ public class AiKnowledgeDocumentDO extends BaseDO {
      */
     private String url;
     /**
-     * token 数量
+     * 文档 token 数量
      */
     private Integer tokens;
     /**
-     * 字符数
+     * 文档字符数
      */
     private Integer wordCount;
-    // TODO @新:chunk 1)是不是 segment,这样命名保持一致会好点哈?2)Size 是不是改成 Tokens 会统一点;3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
+
+
+    // ========== 自定义分段所用参数 ==========
+    // TODO @新:3)defaultChunkSize、defaultChunkSize、minChunkSizeChars、maxNumChunks 这几个字段的命名,可能要微信一起讨论下。尽量命名保持风格统一哈。
     /**
      * 每个文本块的目标 token 数
      */
-    private Integer defaultChunkSize;
-    // TODO @xin:SizeChars 和 wordCount 好像是一个意思,是不是也要统一哈。
+    private Integer defaultSegmentTokens;
     /**
      * 每个文本块的最小字符数
      */
-    private Integer minChunkSizeChars;
+    private Integer minSegmentWordCount;
     /**
      * 低于此值的块会被丢弃
      */
@@ -64,11 +66,13 @@ public class AiKnowledgeDocumentDO extends BaseDO {
     /**
      * 最大块数
      */
-    private Integer maxNumChunks;
+    private Integer maxNumSegments;
     /**
      * 分块是否保留分隔符
      */
     private Boolean keepSeparator;
+    // ===================================
+
     /**
      * 切片状态
      * <p>

+ 0 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java

@@ -2,8 +2,6 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
 
 import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
-import com.baomidou.mybatisplus.annotation.FieldStrategy;
-import com.baomidou.mybatisplus.annotation.TableField;
 import com.baomidou.mybatisplus.annotation.TableId;
 import com.baomidou.mybatisplus.annotation.TableName;
 import lombok.Data;
@@ -27,7 +25,6 @@ public class AiKnowledgeSegmentDO extends BaseDO {
     /**
      * 向量库的编号
      */
-    @TableField(updateStrategy = FieldStrategy.ALWAYS) // TODO @新:尽量规避要这个注解。万一后面加个 status 单独更新,可能会踩坑。
     private String vectorId;
     /**
      * 知识库编号

+ 1 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java

@@ -25,8 +25,7 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegment
                 .orderByDesc(AiKnowledgeSegmentDO::getId));
     }
 
-    // TODO @新:selectListByXXX 哈
-    default List<AiKnowledgeSegmentDO> selectList(List<String> vectorIdList) {
+    default List<AiKnowledgeSegmentDO> selectListByVectorIds(List<String> vectorIdList) {
         return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
                 .in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
                 .orderByDesc(AiKnowledgeSegmentDO::getId));

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -71,8 +71,8 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
         }
 
         // 2 构造文本分段器
-        TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultChunkSize(), createReqVO.getMinChunkSizeChars(), createReqVO.getMinChunkLengthToEmbed(),
-                createReqVO.getMaxNumChunks(), createReqVO.getKeepSeparator());
+        TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultSegmentTokens(), createReqVO.getMinSegmentWordCount(), createReqVO.getMinChunkLengthToEmbed(),
+                createReqVO.getMaxNumSegments(), createReqVO.getKeepSeparator());
         // 2.1 文档分段
         List<Document> segments = tokenTextSplitter.apply(documents);
         // 2.2 分段内容入库

+ 2 - 2
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java

@@ -90,7 +90,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
         } else {
             // 2.2 禁用删除向量
             vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
-            knowledgeSegment.setVectorId(null);
+            knowledgeSegment.setVectorId("");
         }
         // 3 更新段落状态
         segmentMapper.updateById(knowledgeSegment);
@@ -114,7 +114,7 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
             return ListUtil.empty();
         }
         // 3.2 段落召回
-        return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class));
+        return segmentMapper.selectListByVectorIds(CollUtil.getFieldValues(documentList, "id", String.class));
     }
 
     /**

+ 2 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java

@@ -47,13 +47,12 @@ public interface AiKnowledgeService {
      */
     PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
 
-    // TODO @新:knowledgeId 和 validateKnowledgeExists 的 id 是同一个么?如果是的话,建议变量也用 id 哈,然后两边的 id 注释,保持一致
     /**
      * 根据知识库编号获取向量存储实例
      *
-     * @param knowledgeId 知识库编号
+     * @param id 知识库编号
      * @return 向量存储实例
      */
-    VectorStore getVectorStoreById(Long knowledgeId);
+    VectorStore getVectorStoreById(Long id);
 
 }

+ 5 - 8
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java

@@ -29,21 +29,18 @@ import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_NOT_
 @Slf4j
 public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 
-    @Resource
-    private AiChatModelService chatModalService;
-
     @Resource
     private AiKnowledgeMapper knowledgeMapper;
+
     @Resource
     private AiChatModelService chatModelService;
     @Resource
     private AiApiKeyService apiKeyService;
-    // TODO @新:chatModelService 和 apiKeyService 可以放到 33 行的 chatModalService 后面。尽量保持,想通类型的变量在一块。例如说,Service 一块,Mapper 一块。
 
     @Override
     public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
         // 1. 校验模型配置
-        AiChatModelDO model = chatModalService.validateChatModel(createReqVO.getModelId());
+        AiChatModelDO model = chatModelService.validateChatModel(createReqVO.getModelId());
 
         // 2. 插入知识库
         AiKnowledgeDO knowledgeBase = BeanUtils.toBean(createReqVO, AiKnowledgeDO.class)
@@ -60,7 +57,7 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
             throw exception(KNOWLEDGE_NOT_EXISTS);
         }
         // 1.2 校验模型配置
-        AiChatModelDO model = chatModalService.validateChatModel(updateReqVO.getModelId());
+        AiChatModelDO model = chatModelService.validateChatModel(updateReqVO.getModelId());
 
         // 2. 更新知识库
         AiKnowledgeDO updateDO = BeanUtils.toBean(updateReqVO, AiKnowledgeDO.class);
@@ -83,8 +80,8 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
     }
 
     @Override
-    public VectorStore getVectorStoreById(Long knowledgeId) {
-        AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId);
+    public VectorStore getVectorStoreById(Long id) {
+        AiKnowledgeDO knowledge = validateKnowledgeExists(id);
         AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
         // 创建或获取 VectorStore 对象
         return apiKeyService.getOrCreateVectorStore(model.getKeyId());

+ 0 - 1
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java

@@ -197,7 +197,6 @@ public class AiModelFactoryImpl implements AiModelFactory {
         });
     }
 
-    // TODO @新:貌似可以创建一个大的 VectorStore。然后搜的时候,通过 Filter.Expression 过滤对应的数据。
     @Override
     public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
         String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);