Browse Source

!20 【新增】AI 知识库: 段落召回、段落启禁用、配置自定义
Merge pull request !20 from 小新/master-jdk21-ai

芋道源码 10 months ago
parent
commit
0b1d9ce251
19 changed files with 277 additions and 124 deletions
  1. 7 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeCreateMyReqVO.java
  2. 19 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java
  3. 17 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/segment/AiKnowledgeSegmentSearchReqVO.java
  4. 12 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java
  5. 21 1
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java
  6. 6 3
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java
  7. 8 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java
  8. 5 16
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java
  9. 12 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java
  10. 96 4
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java
  11. 9 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java
  12. 13 0
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java
  13. 1 4
      yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/model/AiApiKeyServiceImpl.java
  14. 7 5
      yudao-module-ai/yudao-spring-boot-starter-ai/pom.xml
  15. 5 11
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/config/YudaoAiAutoConfiguration.java
  16. 14 0
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactory.java
  17. 25 0
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java
  18. 0 28
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiVectorStoreFactory.java
  19. 0 52
      yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiVectorStoreFactoryImpl.java

+ 7 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeCreateMyReqVO.java

@@ -25,4 +25,11 @@ public class AiKnowledgeCreateMyReqVO {
     @NotNull(message = "嵌入模型不能为空")
     private Long modelId;
 
+    @Schema(description = "相似性阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "0.5")
+    @NotNull(message = "相似性阈值不能为空")
+    private Double similarityThreshold;
+
+    @Schema(description = "topK", requiredMode = Schema.RequiredMode.REQUIRED, example = "3")
+    @NotNull(message = "topK 不能为空")
+    private Integer topK;
 }

+ 19 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/knowledge/AiKnowledgeDocumentCreateReqVO.java

@@ -23,4 +23,23 @@ public class AiKnowledgeDocumentCreateReqVO {
     @URL(message = "文档 URL 格式不正确")
     private String url;
 
+    @Schema(description = "每个文本块的目标 token 数", requiredMode = Schema.RequiredMode.REQUIRED, example = "800")
+    @NotNull(message = "每个文本块的目标 token 数不能为空")
+    private Integer defaultChunkSize;
+
+    @Schema(description = "每个文本块的最小字符数", requiredMode = Schema.RequiredMode.REQUIRED, example = "350")
+    @NotNull(message = "每个文本块的最小字符数不能为空")
+    private Integer minChunkSizeChars;
+
+    @Schema(description = "丢弃阈值", requiredMode = Schema.RequiredMode.REQUIRED, example = "5")
+    @NotNull(message = "丢弃阈值不能为空")
+    private Integer minChunkLengthToEmbed;
+
+    @Schema(description = "最大块数", requiredMode = Schema.RequiredMode.REQUIRED, example = "10000")
+    @NotNull(message = "最大块数不能为空")
+    private Integer maxNumChunks;
+
+    @Schema(description = "分块是否保留分隔符", requiredMode = Schema.RequiredMode.REQUIRED, example = "true")
+    @NotNull(message = "分块是否保留分隔符不能为空")
+    private Boolean keepSeparator;
 }

+ 17 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/controller/admin/knowledge/vo/segment/AiKnowledgeSegmentSearchReqVO.java

@@ -0,0 +1,17 @@
+package cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment;
+
+import io.swagger.v3.oas.annotations.media.Schema;
+import lombok.Data;
+
+
+@Schema(description = "管理后台 - AI 知识库段落召回 Request VO")
+@Data
+public class AiKnowledgeSegmentSearchReqVO {
+
+    @Schema(description = "知识库编号", requiredMode = Schema.RequiredMode.REQUIRED, example = "24790")
+    private Long knowledgeId;
+
+    @Schema(description = "内容", requiredMode = Schema.RequiredMode.REQUIRED, example = "Java 学习路线")
+    private String content;
+
+}

+ 12 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDO.java

@@ -52,6 +52,18 @@ public class AiKnowledgeDO extends BaseDO {
      * 模型标识
      */
     private String model;
+
+    /**
+     * topK
+     */
+    private Integer topK;
+
+    /**
+     * 相似度阈值
+     */
+    private Double similarityThreshold;
+
+
     /**
      * 状态
      * <p>

+ 21 - 1
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeDocumentDO.java

@@ -23,7 +23,7 @@ public class AiKnowledgeDocumentDO extends BaseDO {
     private Long id;
     /**
      * 知识库编号
-     *
+     * <p>
      * 关联 {@link AiKnowledgeDO#getId()}
      */
     private Long knowledgeId;
@@ -47,6 +47,26 @@ public class AiKnowledgeDocumentDO extends BaseDO {
      * 字符数
      */
     private Integer wordCount;
+    /**
+     * 每个文本块的目标 token 数
+     */
+    private Integer defaultChunkSize;
+    /**
+     * 每个文本块的最小字符数
+     */
+    private Integer minChunkSizeChars;
+    /**
+     * 低于此值的块会被丢弃
+     */
+    private Integer minChunkLengthToEmbed;
+    /**
+     * 最大块数
+     */
+    private Integer maxNumChunks;
+    /**
+     * 分块是否保留分隔符
+     */
+    private Boolean keepSeparator;
     /**
      * 切片状态
      * <p>

+ 6 - 3
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/dataobject/knowledge/AiKnowledgeSegmentDO.java

@@ -2,6 +2,8 @@ package cn.iocoder.yudao.module.ai.dal.dataobject.knowledge;
 
 import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.mybatis.core.dataobject.BaseDO;
+import com.baomidou.mybatisplus.annotation.FieldStrategy;
+import com.baomidou.mybatisplus.annotation.TableField;
 import com.baomidou.mybatisplus.annotation.TableId;
 import com.baomidou.mybatisplus.annotation.TableName;
 import lombok.Data;
@@ -25,16 +27,17 @@ public class AiKnowledgeSegmentDO extends BaseDO {
     /**
      * 向量库的编号
      */
+    @TableField(updateStrategy = FieldStrategy.ALWAYS)
     private String vectorId;
     /**
      * 知识库编号
-     *
+     * <p>
      * 关联 {@link AiKnowledgeDO#getId()}
      */
     private Long knowledgeId;
     /**
      * 文档编号
-     *
+     * <p>
      * 关联 {@link AiKnowledgeDocumentDO#getId()}
      */
     private Long documentId;
@@ -52,7 +55,7 @@ public class AiKnowledgeSegmentDO extends BaseDO {
     private Integer tokens;
     /**
      * 状态
-     *
+     * <p>
      * 枚举 {@link CommonStatusEnum}
      */
     private Integer status;

+ 8 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/dal/mysql/knowledge/AiKnowledgeSegmentMapper.java

@@ -7,6 +7,8 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowle
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
 import org.apache.ibatis.annotations.Mapper;
 
+import java.util.List;
+
 /**
  * AI 知识库-分片 Mapper
  *
@@ -22,4 +24,10 @@ public interface AiKnowledgeSegmentMapper extends BaseMapperX<AiKnowledgeSegment
                 .likeIfPresent(AiKnowledgeSegmentDO::getContent, reqVO.getKeyword())
                 .orderByDesc(AiKnowledgeSegmentDO::getId));
     }
+
+    default List<AiKnowledgeSegmentDO> selectList(List<String> vectorIdList) {
+        return selectList(new LambdaQueryWrapperX<AiKnowledgeSegmentDO>()
+                .in(AiKnowledgeSegmentDO::getVectorId, vectorIdList)
+                .orderByDesc(AiKnowledgeSegmentDO::getId));
+    }
 }

+ 5 - 16
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeDocumentServiceImpl.java

@@ -9,15 +9,11 @@ import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentPageReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.document.AiKnowledgeDocumentUpdateReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeDocumentCreateReqVO;
-import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDocumentDO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
-import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
 import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeDocumentMapper;
 import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper;
 import cn.iocoder.yudao.module.ai.enums.knowledge.AiKnowledgeDocumentStatusEnum;
-import cn.iocoder.yudao.module.ai.service.model.AiApiKeyService;
-import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
 import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
 import org.springframework.ai.document.Document;
@@ -48,24 +44,16 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
     @Resource
     private AiKnowledgeSegmentMapper segmentMapper;
 
-    @Resource
-    private TokenTextSplitter tokenTextSplitter;
     @Resource
     private TokenCountEstimator tokenCountEstimator;
-
-    @Resource
-    private AiApiKeyService apiKeyService;
     @Resource
     private AiKnowledgeService knowledgeService;
-    @Resource
-    private AiChatModelService chatModelService;
 
     @Override
     @Transactional(rollbackFor = Exception.class)
     public Long createKnowledgeDocument(AiKnowledgeDocumentCreateReqVO createReqVO) {
-        // 0. 校验
-        AiKnowledgeDO knowledge = knowledgeService.validateKnowledgeExists(createReqVO.getKnowledgeId());
-        AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
+        // 0. 校验并获取向量存储实例
+        VectorStore vectorStore = knowledgeService.getVectorStoreById(createReqVO.getKnowledgeId());
 
         // 1.1 下载文档
         TikaDocumentReader loader = new TikaDocumentReader(downloadFile(createReqVO.getUrl()));
@@ -82,6 +70,9 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
             return documentId;
         }
 
+        // 2 构造文本分段器
+        TokenTextSplitter tokenTextSplitter = new TokenTextSplitter(createReqVO.getDefaultChunkSize(), createReqVO.getMinChunkSizeChars(), createReqVO.getMinChunkLengthToEmbed(),
+                createReqVO.getMaxNumChunks(), createReqVO.getKeepSeparator());
         // 2.1 文档分段
         List<Document> segments = tokenTextSplitter.apply(documents);
         // 2.2 分段内容入库
@@ -92,8 +83,6 @@ public class AiKnowledgeDocumentServiceImpl implements AiKnowledgeDocumentServic
                         .setStatus(CommonStatusEnum.ENABLE.getStatus()));
         segmentMapper.insertBatch(segmentDOList);
 
-        // 3.1 获取向量存储实例
-        VectorStore vectorStore = apiKeyService.getOrCreateVectorStore(model.getKeyId());
         // 3.2 向量化并存储
         segments.forEach(segment -> segment.getMetadata().put(AiKnowledgeSegmentDO.FIELD_KNOWLEDGE_ID, createReqVO.getKnowledgeId()));
         vectorStore.add(segments);

+ 12 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentService.java

@@ -2,10 +2,13 @@ package cn.iocoder.yudao.module.ai.service.knowledge;
 
 import cn.iocoder.yudao.framework.common.pojo.PageResult;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentPageReqVO;
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentSearchReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateStatusReqVO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
 
+import java.util.List;
+
 /**
  * AI 知识库段落 Service 接口
  *
@@ -35,4 +38,13 @@ public interface AiKnowledgeSegmentService {
      */
     void updateKnowledgeSegmentStatus(AiKnowledgeSegmentUpdateStatusReqVO reqVO);
 
+
+    /**
+     * 段落召回
+     *
+     * @param reqVO 召回请求信息
+     * @return 召回的段落
+     */
+    List<AiKnowledgeSegmentDO> similaritySearch(AiKnowledgeSegmentSearchReqVO reqVO);
+
 }

+ 96 - 4
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeSegmentServiceImpl.java

@@ -1,16 +1,34 @@
 package cn.iocoder.yudao.module.ai.service.knowledge;
 
+import cn.hutool.core.collection.CollUtil;
+import cn.hutool.core.collection.ListUtil;
+import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
 import cn.iocoder.yudao.framework.common.pojo.PageResult;
 import cn.iocoder.yudao.framework.common.util.object.BeanUtils;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentPageReqVO;
+import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentSearchReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.segment.AiKnowledgeSegmentUpdateStatusReqVO;
+import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeSegmentDO;
+import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
 import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeSegmentMapper;
+import cn.iocoder.yudao.module.ai.service.model.AiApiKeyService;
+import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
 import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.document.Document;
+import org.springframework.ai.vectorstore.SearchRequest;
+import org.springframework.ai.vectorstore.VectorStore;
+import org.springframework.ai.vectorstore.filter.FilterExpressionBuilder;
 import org.springframework.stereotype.Service;
 
+import java.util.List;
+import java.util.Objects;
+
+import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
+import static cn.iocoder.yudao.module.ai.enums.ErrorCodeConstants.KNOWLEDGE_SEGMENT_NOT_EXISTS;
+
 /**
  * AI 知识库分片 Service 实现类
  *
@@ -23,6 +41,13 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
     @Resource
     private AiKnowledgeSegmentMapper segmentMapper;
 
+    @Resource
+    private AiKnowledgeService knowledgeService;
+    @Resource
+    private AiChatModelService chatModelService;
+    @Resource
+    private AiApiKeyService apiKeyService;
+
     @Override
     public PageResult<AiKnowledgeSegmentDO> getKnowledgeSegmentPage(AiKnowledgeSegmentPageReqVO pageReqVO) {
         return segmentMapper.selectPage(pageReqVO);
@@ -30,13 +55,80 @@ public class AiKnowledgeSegmentServiceImpl implements AiKnowledgeSegmentService
 
     @Override
     public void updateKnowledgeSegment(AiKnowledgeSegmentUpdateReqVO reqVO) {
-        segmentMapper.updateById(BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class));
-        // TODO @xin 重新向量化
+        // 0 校验
+        AiKnowledgeSegmentDO oldKnowledgeSegment = validateKnowledgeSegmentExists(reqVO.getId());
+        // 2.1 获取知识库向量实例
+        VectorStore vectorStore = knowledgeService.getVectorStoreById(oldKnowledgeSegment.getKnowledgeId());
+        // 2.2 删除原向量
+        vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
+
+        // 2.3 重新向量化
+        Document document = new Document(reqVO.getContent());
+        document.getMetadata().put(AiKnowledgeSegmentDO.FIELD_KNOWLEDGE_ID, oldKnowledgeSegment.getKnowledgeId());
+        vectorStore.add(List.of(document));
+
+        // 2.1 更新段落内容
+        AiKnowledgeSegmentDO knowledgeSegment = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class);
+        knowledgeSegment.setVectorId(document.getId());
+        segmentMapper.updateById(knowledgeSegment);
     }
 
     @Override
     public void updateKnowledgeSegmentStatus(AiKnowledgeSegmentUpdateStatusReqVO reqVO) {
-        segmentMapper.updateById(BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class));
-        // TODO @xin 1.禁用删除向量 2.启用重新向量化
+        // 0 校验
+        AiKnowledgeSegmentDO oldKnowledgeSegment = validateKnowledgeSegmentExists(reqVO.getId());
+        // 1 获取知识库向量实例
+        VectorStore vectorStore = knowledgeService.getVectorStoreById(oldKnowledgeSegment.getKnowledgeId());
+        AiKnowledgeSegmentDO knowledgeSegment = BeanUtils.toBean(reqVO, AiKnowledgeSegmentDO.class);
+
+        if (Objects.equals(reqVO.getStatus(), CommonStatusEnum.ENABLE.getStatus())) {
+            // 2.1 启用重新向量化
+            Document document = new Document(oldKnowledgeSegment.getContent());
+            document.getMetadata().put(AiKnowledgeSegmentDO.FIELD_KNOWLEDGE_ID, oldKnowledgeSegment.getKnowledgeId());
+            vectorStore.add(List.of(document));
+            knowledgeSegment.setVectorId(document.getId());
+        } else {
+            // 2.2 禁用删除向量
+            vectorStore.delete(List.of(oldKnowledgeSegment.getVectorId()));
+            knowledgeSegment.setVectorId(null);
+        }
+        // 3 更新段落状态
+        segmentMapper.updateById(knowledgeSegment);
+    }
+
+    @Override
+    public List<AiKnowledgeSegmentDO> similaritySearch(AiKnowledgeSegmentSearchReqVO reqVO) {
+        // 0. 校验
+        AiKnowledgeDO knowledge = knowledgeService.validateKnowledgeExists(reqVO.getKnowledgeId());
+        AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
+
+        // 1.1 获取向量存储实例
+        VectorStore vectorStore = apiKeyService.getOrCreateVectorStore(model.getKeyId());
+
+        // 1.2 向量检索
+        List<Document> documentList = vectorStore.similaritySearch(SearchRequest.query(reqVO.getContent())
+                .withTopK(knowledge.getTopK())
+                .withSimilarityThreshold(knowledge.getSimilarityThreshold())
+                .withFilterExpression(new FilterExpressionBuilder().eq(AiKnowledgeSegmentDO.FIELD_KNOWLEDGE_ID, reqVO.getKnowledgeId()).build()));
+        if (CollUtil.isEmpty(documentList)) {
+            return ListUtil.empty();
+        }
+        // 2.1 段落召回
+        return segmentMapper.selectList(CollUtil.getFieldValues(documentList, "id", String.class));
+    }
+
+
+    /**
+     * 校验段落是否存在
+     *
+     * @param id 文档编号
+     * @return 段落信息
+     */
+    private AiKnowledgeSegmentDO validateKnowledgeSegmentExists(Long id) {
+        AiKnowledgeSegmentDO knowledgeSegment = segmentMapper.selectById(id);
+        if (knowledgeSegment == null) {
+            throw exception(KNOWLEDGE_SEGMENT_NOT_EXISTS);
+        }
+        return knowledgeSegment;
     }
 }

+ 9 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeService.java

@@ -5,6 +5,7 @@ import cn.iocoder.yudao.framework.common.pojo.PageResult;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeCreateMyReqVO;
 import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnowledgeUpdateMyReqVO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
+import org.springframework.ai.vectorstore.VectorStore;
 
 /**
  * AI 知识库-基础信息 Service 接口
@@ -47,4 +48,12 @@ public interface AiKnowledgeService {
      * @return 知识库分页
      */
     PageResult<AiKnowledgeDO> getKnowledgePageMy(Long userId, PageParam pageReqVO);
+
+    /**
+     * 根据知识库编号获取向量存储实例
+     *
+     * @param knowledgeId 知识库编号
+     * @return 向量存储实例
+     */
+    VectorStore getVectorStoreById(Long knowledgeId);
 }

+ 13 - 0
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/knowledge/AiKnowledgeServiceImpl.java

@@ -10,9 +10,11 @@ import cn.iocoder.yudao.module.ai.controller.admin.knowledge.vo.knowledge.AiKnow
 import cn.iocoder.yudao.module.ai.dal.dataobject.knowledge.AiKnowledgeDO;
 import cn.iocoder.yudao.module.ai.dal.dataobject.model.AiChatModelDO;
 import cn.iocoder.yudao.module.ai.dal.mysql.knowledge.AiKnowledgeMapper;
+import cn.iocoder.yudao.module.ai.service.model.AiApiKeyService;
 import cn.iocoder.yudao.module.ai.service.model.AiChatModelService;
 import jakarta.annotation.Resource;
 import lombok.extern.slf4j.Slf4j;
+import org.springframework.ai.vectorstore.VectorStore;
 import org.springframework.stereotype.Service;
 
 import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception;
@@ -32,6 +34,10 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
 
     @Resource
     private AiKnowledgeMapper knowledgeMapper;
+    @Resource
+    private AiChatModelService chatModelService;
+    @Resource
+    private AiApiKeyService apiKeyService;
 
     @Override
     public Long createKnowledgeMy(AiKnowledgeCreateMyReqVO createReqVO, Long userId) {
@@ -75,4 +81,11 @@ public class AiKnowledgeServiceImpl implements AiKnowledgeService {
         return knowledgeMapper.selectPageByMy(userId, pageReqVO);
     }
 
+    @Override
+    public VectorStore getVectorStoreById(Long knowledgeId) {
+        AiKnowledgeDO knowledge = validateKnowledgeExists(knowledgeId);
+        AiChatModelDO model = chatModelService.validateChatModel(knowledge.getModelId());
+        return apiKeyService.getOrCreateVectorStore(model.getKeyId());
+    }
+
 }

+ 1 - 4
yudao-module-ai/yudao-module-ai-biz/src/main/java/cn/iocoder/yudao/module/ai/service/model/AiApiKeyServiceImpl.java

@@ -2,7 +2,6 @@ package cn.iocoder.yudao.module.ai.service.model;
 
 import cn.iocoder.yudao.framework.ai.core.enums.AiPlatformEnum;
 import cn.iocoder.yudao.framework.ai.core.factory.AiModelFactory;
-import cn.iocoder.yudao.framework.ai.core.factory.AiVectorStoreFactory;
 import cn.iocoder.yudao.framework.ai.core.model.midjourney.api.MidjourneyApi;
 import cn.iocoder.yudao.framework.ai.core.model.suno.api.SunoApi;
 import cn.iocoder.yudao.framework.common.enums.CommonStatusEnum;
@@ -39,8 +38,6 @@ public class AiApiKeyServiceImpl implements AiApiKeyService {
 
     @Resource
     private AiModelFactory modelFactory;
-    @Resource
-    private AiVectorStoreFactory vectorFactory;
 
     @Override
     public Long createApiKey(AiApiKeySaveReqVO createReqVO) {
@@ -149,7 +146,7 @@ public class AiApiKeyServiceImpl implements AiApiKeyService {
     public VectorStore getOrCreateVectorStore(Long id) {
         AiApiKeyDO apiKey = validateApiKey(id);
         AiPlatformEnum platform = AiPlatformEnum.validatePlatform(apiKey.getPlatform());
-        return vectorFactory.getOrCreateVectorStore(getEmbeddingModel(id), platform, apiKey.getApiKey(), apiKey.getUrl());
+        return modelFactory.getOrCreateVectorStore(getEmbeddingModel(id), platform, apiKey.getApiKey(), apiKey.getUrl());
     }
 
 }

+ 7 - 5
yudao-module-ai/yudao-spring-boot-starter-ai/pom.xml

@@ -46,11 +46,13 @@
         </dependency>
 
         <!-- 向量化,基于 Redis 存储,Tika 解析内容 -->
-        <dependency>
-            <groupId>${spring-ai.groupId}</groupId>
-            <artifactId>spring-ai-transformers-spring-boot-starter</artifactId>
-            <version>${spring-ai.version}</version>
-        </dependency>
+
+        <!-- 暂不做经济型,先注释 -->
+        <!--        <dependency>-->
+        <!--            <groupId>${spring-ai.groupId}</groupId>-->
+        <!--            <artifactId>spring-ai-transformers-spring-boot-starter</artifactId>-->
+        <!--            <version>${spring-ai.version}</version>-->
+        <!--        </dependency>-->
         <dependency>
             <groupId>${spring-ai.groupId}</groupId>
             <artifactId>spring-ai-tika-document-reader</artifactId>

+ 5 - 11
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/config/YudaoAiAutoConfiguration.java

@@ -2,8 +2,6 @@ package cn.iocoder.yudao.framework.ai.config;
 
 import cn.iocoder.yudao.framework.ai.core.factory.AiModelFactory;
 import cn.iocoder.yudao.framework.ai.core.factory.AiModelFactoryImpl;
-import cn.iocoder.yudao.framework.ai.core.factory.AiVectorStoreFactory;
-import cn.iocoder.yudao.framework.ai.core.factory.AiVectorStoreFactoryImpl;
 import cn.iocoder.yudao.framework.ai.core.model.deepseek.DeepSeekChatModel;
 import cn.iocoder.yudao.framework.ai.core.model.deepseek.DeepSeekChatOptions;
 import cn.iocoder.yudao.framework.ai.core.model.midjourney.api.MidjourneyApi;
@@ -38,11 +36,6 @@ public class YudaoAiAutoConfiguration {
         return new AiModelFactoryImpl();
     }
 
-    @Bean
-    public AiVectorStoreFactory aiVectorFactory() {
-        return new AiVectorStoreFactoryImpl();
-    }
-
 
     // ========== 各种 AI Client 创建 ==========
 
@@ -89,7 +82,7 @@ public class YudaoAiAutoConfiguration {
     // TODO @xin 免费版本
 //    @Bean
 //    @Lazy // TODO 芋艿:临时注释,避免无法启动」
-//    public EmbeddingModel transformersEmbeddingClient() {
+//    public TransformersEmbeddingModel transformersEmbeddingClient() {
 //        return new TransformersEmbeddingModel(MetadataMode.EMBED);
 //    }
 
@@ -98,23 +91,24 @@ public class YudaoAiAutoConfiguration {
      */
 //    @Bean
 //    @Lazy // TODO 芋艿:临时注释,避免无法启动
-//    public RedisVectorStore vectorStore(TongYiTextEmbeddingModel tongYiTextEmbeddingModel, RedisVectorStoreProperties properties,
+//    public RedisVectorStore vectorStore(TransformersEmbeddingModel embeddingModel, RedisVectorStoreProperties properties,
 //                                        RedisProperties redisProperties) {
 //        var config = RedisVectorStore.RedisVectorStoreConfig.builder()
 //                .withIndexName(properties.getIndex())
 //                .withPrefix(properties.getPrefix())
+//                .withMetadataFields(new RedisVectorStore.MetadataField("knowledgeId", Schema.FieldType.NUMERIC))
 //                .build();
 //
-//        RedisVectorStore redisVectorStore = new RedisVectorStore(config, tongYiTextEmbeddingModel,
+//        RedisVectorStore redisVectorStore = new RedisVectorStore(config, embeddingModel,
 //                new JedisPooled(redisProperties.getHost(), redisProperties.getPort()),
 //                properties.isInitializeSchema());
 //        redisVectorStore.afterPropertiesSet();
 //        return redisVectorStore;
 //    }
-
     @Bean
     @Lazy // TODO 芋艿:临时注释,避免无法启动
     public TokenTextSplitter tokenTextSplitter() {
+        //TODO  @xin 配置提取
         return new TokenTextSplitter(500, 100, 5, 10000, true);
     }
 

+ 14 - 0
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactory.java

@@ -6,6 +6,7 @@ import cn.iocoder.yudao.framework.ai.core.model.suno.api.SunoApi;
 import org.springframework.ai.chat.model.ChatModel;
 import org.springframework.ai.embedding.EmbeddingModel;
 import org.springframework.ai.image.ImageModel;
+import org.springframework.ai.vectorstore.VectorStore;
 
 /**
  * AI Model 模型工厂的接口类
@@ -92,4 +93,17 @@ public interface AiModelFactory {
      */
     EmbeddingModel getOrCreateEmbeddingModel(AiPlatformEnum platform, String apiKey, String url);
 
+    /**
+     * 基于指定配置,获得 VectorStore 对象
+     * <p>
+     * 如果不存在,则进行创建
+     *
+     * @param embeddingModel 嵌入模型
+     * @param platform       平台
+     * @param apiKey         API KEY
+     * @param url            API URL
+     * @return VectorStore 对象
+     */
+    VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url);
+
 }

+ 25 - 0
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiModelFactoryImpl.java

@@ -13,6 +13,7 @@ import cn.iocoder.yudao.framework.ai.core.model.deepseek.DeepSeekChatModel;
 import cn.iocoder.yudao.framework.ai.core.model.midjourney.api.MidjourneyApi;
 import cn.iocoder.yudao.framework.ai.core.model.suno.api.SunoApi;
 import cn.iocoder.yudao.framework.ai.core.model.xinghuo.XingHuoChatModel;
+import cn.iocoder.yudao.framework.common.util.spring.SpringUtils;
 import com.alibaba.cloud.ai.tongyi.TongYiAutoConfiguration;
 import com.alibaba.cloud.ai.tongyi.TongYiConnectionProperties;
 import com.alibaba.cloud.ai.tongyi.chat.TongYiChatModel;
@@ -54,13 +55,18 @@ import org.springframework.ai.qianfan.api.QianFanApi;
 import org.springframework.ai.qianfan.api.QianFanImageApi;
 import org.springframework.ai.stabilityai.StabilityAiImageModel;
 import org.springframework.ai.stabilityai.api.StabilityAiApi;
+import org.springframework.ai.vectorstore.RedisVectorStore;
+import org.springframework.ai.vectorstore.VectorStore;
 import org.springframework.ai.zhipuai.ZhiPuAiChatModel;
 import org.springframework.ai.zhipuai.ZhiPuAiImageModel;
 import org.springframework.ai.zhipuai.api.ZhiPuAiApi;
 import org.springframework.ai.zhipuai.api.ZhiPuAiImageApi;
+import org.springframework.boot.autoconfigure.data.redis.RedisProperties;
 import org.springframework.retry.support.RetryTemplate;
 import org.springframework.web.client.ResponseErrorHandler;
 import org.springframework.web.client.RestClient;
+import redis.clients.jedis.JedisPooled;
+import redis.clients.jedis.search.Schema;
 
 import java.util.List;
 
@@ -191,6 +197,25 @@ public class AiModelFactoryImpl implements AiModelFactory {
         });
     }
 
+    @Override
+    public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
+        String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);
+        return Singleton.get(cacheKey, (Func0<VectorStore>) () -> {
+            String prefix = StrUtil.format("{}#{}:", platform.getPlatform(), apiKey);
+            var config = RedisVectorStore.RedisVectorStoreConfig.builder()
+                    .withIndexName(cacheKey)
+                    .withPrefix(prefix)
+                    .withMetadataFields(new RedisVectorStore.MetadataField("knowledgeId", Schema.FieldType.NUMERIC))
+                    .build();
+            RedisProperties redisProperties = SpringUtils.getBean(RedisProperties.class);
+            RedisVectorStore redisVectorStore = new RedisVectorStore(config, embeddingModel,
+                    new JedisPooled(redisProperties.getHost(), redisProperties.getPort()),
+                    true);
+            redisVectorStore.afterPropertiesSet();
+            return redisVectorStore;
+        });
+    }
+
     private static String buildClientCacheKey(Class<?> clazz, Object... params) {
         if (ArrayUtil.isEmpty(params)) {
             return clazz.getName();

+ 0 - 28
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiVectorStoreFactory.java

@@ -1,28 +0,0 @@
-package cn.iocoder.yudao.framework.ai.core.factory;
-
-import cn.iocoder.yudao.framework.ai.core.enums.AiPlatformEnum;
-import org.springframework.ai.embedding.EmbeddingModel;
-import org.springframework.ai.vectorstore.VectorStore;
-
-// TODO @xin:也放到 AiModelFactory 里面好了,后续改成 AiFactory
-/**
- * AI Vector 模型工厂的接口类
- *
- * @author xiaoxin
- */
-public interface AiVectorStoreFactory {
-
-    /**
-     * 基于指定配置,获得 VectorStore 对象
-     * <p>
-     * 如果不存在,则进行创建
-     *
-     * @param embeddingModel 嵌入模型
-     * @param platform       平台
-     * @param apiKey         API KEY
-     * @param url            API URL
-     * @return VectorStore 对象
-     */
-    VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url);
-
-}

+ 0 - 52
yudao-module-ai/yudao-spring-boot-starter-ai/src/main/java/cn/iocoder/yudao/framework/ai/core/factory/AiVectorStoreFactoryImpl.java

@@ -1,52 +0,0 @@
-package cn.iocoder.yudao.framework.ai.core.factory;
-
-import cn.hutool.core.lang.Singleton;
-import cn.hutool.core.lang.func.Func0;
-import cn.hutool.core.util.ArrayUtil;
-import cn.hutool.core.util.StrUtil;
-import cn.iocoder.yudao.framework.ai.core.enums.AiPlatformEnum;
-import cn.iocoder.yudao.framework.common.util.spring.SpringUtils;
-import org.springframework.ai.embedding.EmbeddingModel;
-import org.springframework.ai.vectorstore.RedisVectorStore;
-import org.springframework.ai.vectorstore.VectorStore;
-import org.springframework.boot.autoconfigure.data.redis.RedisProperties;
-import redis.clients.jedis.JedisPooled;
-
-/**
- * AI Vector 模型工厂的实现类
- * 使用 redisVectorStore 实现 VectorStore
- *
- * @author xiaoxin
- */
-public class AiVectorStoreFactoryImpl implements AiVectorStoreFactory {
-
-    @Override
-    public VectorStore getOrCreateVectorStore(EmbeddingModel embeddingModel, AiPlatformEnum platform, String apiKey, String url) {
-        String cacheKey = buildClientCacheKey(VectorStore.class, platform, apiKey, url);
-        return Singleton.get(cacheKey, (Func0<VectorStore>) () -> {
-            // TODO 芋艿 @xin 这两个配置取哪好呢
-            // TODO 不同模型的向量维度可能会不一样,目前看貌似是以 index 来做区分的,维度不一样存不到一个 index 上
-            // TODO 回复:好的哈
-            String index = "default-index";
-            String prefix = "default:";
-            var config = RedisVectorStore.RedisVectorStoreConfig.builder()
-                    .withIndexName(index)
-                    .withPrefix(prefix)
-                    .build();
-            RedisProperties redisProperties = SpringUtils.getBean(RedisProperties.class);
-            RedisVectorStore redisVectorStore = new RedisVectorStore(config, embeddingModel,
-                    new JedisPooled(redisProperties.getHost(), redisProperties.getPort()),
-                    true);
-            redisVectorStore.afterPropertiesSet();
-            return redisVectorStore;
-        });
-    }
-
-    private static String buildClientCacheKey(Class<?> clazz, Object... params) {
-        if (ArrayUtil.isEmpty(params)) {
-            return clazz.getName();
-        }
-        return StrUtil.format("{}#{}", clazz.getName(), ArrayUtil.join(params, "_"));
-    }
-
-}