123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232 |
- 'use client'
- import type { FC } from 'react'
- import React, { useCallback, useEffect, useState } from 'react'
- import { useTranslation } from 'react-i18next'
- import UrlInput from '../base/url-input'
- import OptionsWrap from '../base/options-wrap'
- import CrawledResult from '../base/crawled-result'
- import Crawling from '../base/crawling'
- import ErrorMessage from '../base/error-message'
- import Header from './header'
- import Options from './options'
- import cn from '@/utils/classnames'
- import { useModalContext } from '@/context/modal-context'
- import Toast from '@/app/components/base/toast'
- import { checkJinaReaderTaskStatus, createJinaReaderTask } from '@/service/datasets'
- import { sleep } from '@/utils'
- import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
- const ERROR_I18N_PREFIX = 'common.errorMsg'
- const I18N_PREFIX = 'datasetCreation.stepOne.website'
- type Props = {
- onPreview: (payload: CrawlResultItem) => void
- checkedCrawlResult: CrawlResultItem[]
- onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
- onJobIdChange: (jobId: string) => void
- crawlOptions: CrawlOptions
- onCrawlOptionsChange: (payload: CrawlOptions) => void
- }
- enum Step {
- init = 'init',
- running = 'running',
- finished = 'finished',
- }
- const JinaReader: FC<Props> = ({
- onPreview,
- checkedCrawlResult,
- onCheckedCrawlResultChange,
- onJobIdChange,
- crawlOptions,
- onCrawlOptionsChange,
- }) => {
- const { t } = useTranslation()
- const [step, setStep] = useState<Step>(Step.init)
- const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
- useEffect(() => {
- if (step !== Step.init)
- setControlFoldOptions(Date.now())
- }, [step])
- const { setShowAccountSettingModal } = useModalContext()
- const handleSetting = useCallback(() => {
- setShowAccountSettingModal({
- payload: 'data-source',
- })
- }, [setShowAccountSettingModal])
- const checkValid = useCallback((url: string) => {
- let errorMsg = ''
- if (!url) {
- errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
- field: 'url',
- })
- }
- if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
- errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
- if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
- errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
- field: t(`${I18N_PREFIX}.limit`),
- })
- }
- return {
- isValid: !errorMsg,
- errorMsg,
- }
- }, [crawlOptions, t])
- const isInit = step === Step.init
- const isCrawlFinished = step === Step.finished
- const isRunning = step === Step.running
- const [crawlResult, setCrawlResult] = useState<{
- current: number
- total: number
- data: CrawlResultItem[]
- time_consuming: number | string
- } | undefined>(undefined)
- const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
- const showError = isCrawlFinished && crawlErrorMessage
- const waitForCrawlFinished = useCallback(async (jobId: string) => {
- try {
- const res = await checkJinaReaderTaskStatus(jobId) as any
- console.log('res', res)
- if (res.status === 'completed') {
- return {
- isError: false,
- data: {
- ...res,
- total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
- },
- }
- }
- if (res.status === 'failed' || !res.status) {
- return {
- isError: true,
- errorMessage: res.message,
- data: {
- data: [],
- },
- }
- }
- // update the progress
- setCrawlResult({
- ...res,
- total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
- })
- onCheckedCrawlResultChange(res.data || []) // default select the crawl result
- await sleep(2500)
- return await waitForCrawlFinished(jobId)
- }
- catch (e: any) {
- const errorBody = await e.json()
- return {
- isError: true,
- errorMessage: errorBody.message,
- data: {
- data: [],
- },
- }
- }
- }, [crawlOptions.limit])
- const handleRun = useCallback(async (url: string) => {
- const { isValid, errorMsg } = checkValid(url)
- if (!isValid) {
- Toast.notify({
- message: errorMsg!,
- type: 'error',
- })
- return
- }
- setStep(Step.running)
- try {
- const startTime = Date.now()
- const res = await createJinaReaderTask({
- url,
- options: crawlOptions,
- }) as any
- if (res.data) {
- const data = {
- current: 1,
- total: 1,
- data: [{
- title: res.data.title,
- markdown: res.data.content,
- description: res.data.description,
- source_url: res.data.url,
- }],
- time_consuming: (Date.now() - startTime) / 1000,
- }
- setCrawlResult(data)
- onCheckedCrawlResultChange(data.data || [])
- setCrawlErrorMessage('')
- }
- else if (res.job_id) {
- const jobId = res.job_id
- onJobIdChange(jobId)
- const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
- if (isError) {
- setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
- }
- else {
- setCrawlResult(data)
- onCheckedCrawlResultChange(data.data || []) // default select the crawl result
- setCrawlErrorMessage('')
- }
- }
- }
- catch (e) {
- setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
- console.log(e)
- }
- finally {
- setStep(Step.finished)
- }
- }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
- return (
- <div>
- <Header onSetting={handleSetting} />
- <div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}>
- <UrlInput onRun={handleRun} isRunning={isRunning} />
- <OptionsWrap
- className={cn('mt-4')}
- controlFoldOptions={controlFoldOptions}
- >
- <Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
- </OptionsWrap>
- {!isInit && (
- <div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'>
- {isRunning
- && <Crawling
- className='mt-2'
- crawledNum={crawlResult?.current || 0}
- totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0}
- />}
- {showError && (
- <ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
- )}
- {isCrawlFinished && !showError
- && <CrawledResult
- className='mb-2'
- list={crawlResult?.data || []}
- checkedList={checkedCrawlResult}
- onSelectedChange={onCheckedCrawlResultChange}
- onPreview={onPreview}
- usedTime={parseFloat(crawlResult?.time_consuming as string) || 0}
- />
- }
- </div>
- )}
- </div>
- </div>
- )
- }
- export default React.memo(JinaReader)
|