123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581 |
- import type { DataSourceNotionPage, DataSourceProvider } from './common'
- import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
- import type { Tag } from '@/app/components/base/tag-management/constant'
- export enum DataSourceType {
- FILE = 'upload_file',
- NOTION = 'notion_import',
- WEB = 'website_crawl',
- }
- export type DatasetPermission = 'only_me' | 'all_team_members' | 'partial_members'
- export type DataSet = {
- id: string
- name: string
- icon: string
- icon_background: string
- description: string
- permission: DatasetPermission
- data_source_type: DataSourceType
- indexing_technique: 'high_quality' | 'economy'
- created_by: string
- updated_by: string
- updated_at: number
- app_count: number
- document_count: number
- word_count: number
- provider: string
- embedding_model: string
- embedding_model_provider: string
- embedding_available: boolean
- retrieval_model_dict: RetrievalConfig
- retrieval_model: RetrievalConfig
- tags: Tag[]
- partial_member_list?: any[]
- external_knowledge_info: {
- external_knowledge_id: string
- external_knowledge_api_id: string
- external_knowledge_api_name: string
- external_knowledge_api_endpoint: string
- }
- external_retrieval_model: {
- top_k: number
- score_threshold: number
- score_threshold_enabled: boolean
- }
- }
- export type ExternalAPIItem = {
- id: string
- tenant_id: string
- name: string
- description: string
- settings: {
- endpoint: string
- api_key: string
- }
- dataset_bindings: { id: string; name: string }[]
- created_by: string
- created_at: string
- }
- export type ExternalKnowledgeItem = {
- id: string
- name: string
- description: string | null
- provider: 'external'
- permission: DatasetPermission
- data_source_type: null
- indexing_technique: null
- app_count: number
- document_count: number
- word_count: number
- created_by: string
- created_at: string
- updated_by: string
- updated_at: string
- tags: Tag[]
- }
- export type ExternalAPIDeleteResponse = {
- result: 'success' | 'error'
- }
- export type ExternalAPIUsage = {
- is_using: boolean
- count: number
- }
- export type CustomFile = File & {
- id?: string
- extension?: string
- mime_type?: string
- created_by?: string
- created_at?: number
- }
- export type CrawlOptions = {
- crawl_sub_pages: boolean
- only_main_content: boolean
- includes: string
- excludes: string
- limit: number | string
- max_depth: number | string
- use_sitemap: boolean
- }
- export type CrawlResultItem = {
- title: string
- markdown: string
- description: string
- source_url: string
- }
- export type FileItem = {
- fileID: string
- file: CustomFile
- progress: number
- }
- export type DataSetListResponse = {
- data: DataSet[]
- has_more: boolean
- limit: number
- page: number
- total: number
- }
- export type ExternalAPIListResponse = {
- data: ExternalAPIItem[]
- has_more: boolean
- limit: number
- page: number
- total: number
- }
- export type QA = {
- question: string
- answer: string
- }
- export type IndexingEstimateResponse = {
- tokens: number
- total_price: number
- currency: string
- total_segments: number
- preview: string[]
- qa_preview?: QA[]
- }
- export type FileIndexingEstimateResponse = {
- total_nodes: number
- } & IndexingEstimateResponse
- export type IndexingStatusResponse = {
- id: string
- indexing_status: DocumentIndexingStatus
- processing_started_at: number
- parsing_completed_at: number
- cleaning_completed_at: number
- splitting_completed_at: number
- completed_at: any
- paused_at: any
- error: any
- stopped_at: any
- completed_segments: number
- total_segments: number
- }
- export type IndexingStatusBatchResponse = {
- data: IndexingStatusResponse[]
- }
- export type ProcessMode = 'automatic' | 'custom'
- export type ProcessRuleResponse = {
- mode: ProcessMode
- rules: Rules
- limits: Limits
- }
- export type Rules = {
- pre_processing_rules: PreProcessingRule[]
- segmentation: Segmentation
- }
- export type Limits = {
- indexing_max_segmentation_tokens_length: number
- }
- export type PreProcessingRule = {
- id: string
- enabled: boolean
- }
- export type Segmentation = {
- separator: string
- max_tokens: number
- chunk_overlap: number
- }
- export const DocumentIndexingStatusList = [
- 'waiting',
- 'parsing',
- 'cleaning',
- 'splitting',
- 'indexing',
- 'paused',
- 'error',
- 'completed',
- ] as const
- export type DocumentIndexingStatus = typeof DocumentIndexingStatusList[number]
- export const DisplayStatusList = [
- 'queuing',
- 'indexing',
- 'paused',
- 'error',
- 'available',
- 'enabled',
- 'disabled',
- 'archived',
- ] as const
- export type DocumentDisplayStatus = typeof DisplayStatusList[number]
- export type DataSourceInfo = {
- upload_file: {
- id: string
- name: string
- size: number
- mime_type: string
- created_at: number
- created_by: string
- extension: string
- }
- notion_page_icon?: string
- notion_workspace_id?: string
- notion_page_id?: string
- provider?: DataSourceProvider
- job_id: string
- url: string
- }
- export type InitialDocumentDetail = {
- id: string
- batch: string
- position: number
- dataset_id: string
- data_source_type: DataSourceType
- data_source_info: DataSourceInfo
- dataset_process_rule_id: string
- name: string
- created_from: 'api' | 'web'
- created_by: string
- created_at: number
- indexing_status: DocumentIndexingStatus
- display_status: DocumentDisplayStatus
- completed_segments?: number
- total_segments?: number
- doc_form: 'text_model' | 'qa_model'
- doc_language: string
- }
- export type SimpleDocumentDetail = InitialDocumentDetail & {
- enabled: boolean
- word_count: number
- error?: string | null
- archived: boolean
- updated_at: number
- hit_count: number
- dataset_process_rule_id?: string
- data_source_detail_dict?: {
- upload_file: {
- name: string
- extension: string
- }
- }
- }
- export type DocumentListResponse = {
- data: SimpleDocumentDetail[]
- has_more: boolean
- total: number
- page: number
- limit: number
- }
- export type DocumentReq = {
- original_document_id?: string
- indexing_technique?: string
- doc_form: 'text_model' | 'qa_model'
- doc_language: string
- process_rule: ProcessRule
- }
- export type CreateDocumentReq = DocumentReq & {
- data_source: DataSource
- retrieval_model: RetrievalConfig
- embedding_model: string
- embedding_model_provider: string
- }
- export type IndexingEstimateParams = DocumentReq & Partial<DataSource> & {
- dataset_id: string
- }
- export type DataSource = {
- type: DataSourceType
- info_list: {
- data_source_type: DataSourceType
- notion_info_list?: NotionInfo[]
- file_info_list?: {
- file_ids: string[]
- }
- website_info_list?: {
- provider: string
- job_id: string
- urls: string[]
- }
- }
- }
- export type NotionInfo = {
- workspace_id: string
- pages: DataSourceNotionPage[]
- }
- export type NotionPage = {
- page_id: string
- type: string
- }
- export type ProcessRule = {
- mode: string
- rules: Rules
- }
- export type createDocumentResponse = {
- dataset?: DataSet
- batch: string
- documents: InitialDocumentDetail[]
- }
- export type FullDocumentDetail = SimpleDocumentDetail & {
- batch: string
- created_api_request_id: string
- processing_started_at: number
- parsing_completed_at: number
- cleaning_completed_at: number
- splitting_completed_at: number
- tokens: number
- indexing_latency: number
- completed_at: number
- paused_by: string
- paused_at: number
- stopped_at: number
- indexing_status: string
- disabled_at: number
- disabled_by: string
- archived_reason: 'rule_modified' | 're_upload'
- archived_by: string
- archived_at: number
- doc_type?: DocType | null | 'others'
- doc_metadata?: DocMetadata | null
- segment_count: number
- [key: string]: any
- }
- export type DocMetadata = {
- title: string
- language: string
- author: string
- publisher: string
- publicationDate: string
- ISBN: string
- category: string
- [key: string]: string
- }
- export const CUSTOMIZABLE_DOC_TYPES = [
- 'book',
- 'web_page',
- 'paper',
- 'social_media_post',
- 'personal_document',
- 'business_document',
- 'im_chat_log',
- ] as const
- export const FIXED_DOC_TYPES = ['synced_from_github', 'synced_from_notion', 'wikipedia_entry'] as const
- export type CustomizableDocType = typeof CUSTOMIZABLE_DOC_TYPES[number]
- export type FixedDocType = typeof FIXED_DOC_TYPES[number]
- export type DocType = CustomizableDocType | FixedDocType
- export type DocumentDetailResponse = FullDocumentDetail
- export const SEGMENT_STATUS_LIST = ['waiting', 'completed', 'error', 'indexing']
- export type SegmentStatus = typeof SEGMENT_STATUS_LIST[number]
- export type SegmentsQuery = {
- last_id?: string
- limit: number
- // status?: SegmentStatus
- hit_count_gte?: number
- keyword?: string
- enabled?: boolean
- }
- export type SegmentDetailModel = {
- id: string
- position: number
- document_id: string
- content: string
- word_count: number
- tokens: number
- keywords: string[]
- index_node_id: string
- index_node_hash: string
- hit_count: number
- enabled: boolean
- disabled_at: number
- disabled_by: string
- status: SegmentStatus
- created_by: string
- created_at: number
- indexing_at: number
- completed_at: number
- error: string | null
- stopped_at: number
- answer?: string
- }
- export type SegmentsResponse = {
- data: SegmentDetailModel[]
- has_more: boolean
- limit: number
- total: number
- }
- export type HitTestingRecord = {
- id: string
- content: string
- source: 'app' | 'hit_testing' | 'plugin'
- source_app_id: string
- created_by_role: 'account' | 'end_user'
- created_by: string
- created_at: number
- }
- export type HitTesting = {
- segment: Segment
- score: number
- tsne_position: TsnePosition
- }
- export type ExternalKnowledgeBaseHitTesting = {
- content: string
- title: string
- score: number
- metadata: {
- 'x-amz-bedrock-kb-source-uri': string
- 'x-amz-bedrock-kb-data-source-id': string
- }
- }
- export type Segment = {
- id: string
- document: Document
- content: string
- position: number
- word_count: number
- tokens: number
- keywords: string[]
- hit_count: number
- index_node_hash: string
- }
- export type Document = {
- id: string
- data_source_type: string
- name: string
- doc_type: DocType
- }
- export type HitTestingRecordsResponse = {
- data: HitTestingRecord[]
- has_more: boolean
- limit: number
- total: number
- page: number
- }
- export type TsnePosition = {
- x: number
- y: number
- }
- export type HitTestingResponse = {
- query: {
- content: string
- tsne_position: TsnePosition
- }
- records: Array<HitTesting>
- }
- export type ExternalKnowledgeBaseHitTestingResponse = {
- query: {
- content: string
- }
- records: Array<ExternalKnowledgeBaseHitTesting>
- }
- export type RelatedApp = {
- id: string
- name: string
- mode: AppMode
- icon_type: AppIconType | null
- icon: string
- icon_background: string
- icon_url: string
- }
- export type RelatedAppResponse = {
- data: Array<RelatedApp>
- total: number
- }
- export type SegmentUpdater = {
- content: string
- answer?: string
- keywords?: string[]
- }
- export enum DocForm {
- TEXT = 'text_model',
- QA = 'qa_model',
- }
- export type ErrorDocsResponse = {
- data: IndexingStatusResponse[]
- total: number
- }
- export type SelectedDatasetsMode = {
- allHighQuality: boolean
- allHighQualityVectorSearch: boolean
- allHighQualityFullTextSearch: boolean
- allEconomic: boolean
- mixtureHighQualityAndEconomic: boolean
- allInternal: boolean
- allExternal: boolean
- mixtureInternalAndExternal: boolean
- inconsistentEmbeddingModel: boolean
- }
- export enum WeightedScoreEnum {
- SemanticFirst = 'semantic_first',
- KeywordFirst = 'keyword_first',
- Customized = 'customized',
- }
- export enum RerankingModeEnum {
- RerankingModel = 'reranking_model',
- WeightedScore = 'weighted_score',
- }
- export const DEFAULT_WEIGHTED_SCORE = {
- allHighQualityVectorSearch: {
- semantic: 1.0,
- keyword: 0,
- },
- allHighQualityFullTextSearch: {
- semantic: 0,
- keyword: 1.0,
- },
- other: {
- semantic: 0.7,
- keyword: 0.3,
- },
- }
|