index.tsx 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. 'use client'
  2. import type { FC } from 'react'
  3. import React, { useCallback, useEffect, useState } from 'react'
  4. import { useTranslation } from 'react-i18next'
  5. import UrlInput from '../base/url-input'
  6. import OptionsWrap from '../base/options-wrap'
  7. import CrawledResult from '../base/crawled-result'
  8. import Crawling from '../base/crawling'
  9. import ErrorMessage from '../base/error-message'
  10. import Header from './header'
  11. import Options from './options'
  12. import cn from '@/utils/classnames'
  13. import { useModalContext } from '@/context/modal-context'
  14. import type { CrawlOptions, CrawlResultItem } from '@/models/datasets'
  15. import Toast from '@/app/components/base/toast'
  16. import { checkFirecrawlTaskStatus, createFirecrawlTask } from '@/service/datasets'
  17. import { sleep } from '@/utils'
  18. const ERROR_I18N_PREFIX = 'common.errorMsg'
  19. const I18N_PREFIX = 'datasetCreation.stepOne.website'
  20. type Props = {
  21. onPreview: (payload: CrawlResultItem) => void
  22. checkedCrawlResult: CrawlResultItem[]
  23. onCheckedCrawlResultChange: (payload: CrawlResultItem[]) => void
  24. onJobIdChange: (jobId: string) => void
  25. crawlOptions: CrawlOptions
  26. onCrawlOptionsChange: (payload: CrawlOptions) => void
  27. }
  28. enum Step {
  29. init = 'init',
  30. running = 'running',
  31. finished = 'finished',
  32. }
  33. const FireCrawl: FC<Props> = ({
  34. onPreview,
  35. checkedCrawlResult,
  36. onCheckedCrawlResultChange,
  37. onJobIdChange,
  38. crawlOptions,
  39. onCrawlOptionsChange,
  40. }) => {
  41. const { t } = useTranslation()
  42. const [step, setStep] = useState<Step>(Step.init)
  43. const [controlFoldOptions, setControlFoldOptions] = useState<number>(0)
  44. useEffect(() => {
  45. if (step !== Step.init)
  46. setControlFoldOptions(Date.now())
  47. }, [step])
  48. const { setShowAccountSettingModal } = useModalContext()
  49. const handleSetting = useCallback(() => {
  50. setShowAccountSettingModal({
  51. payload: 'data-source',
  52. })
  53. }, [setShowAccountSettingModal])
  54. const checkValid = useCallback((url: string) => {
  55. let errorMsg = ''
  56. if (!url) {
  57. errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
  58. field: 'url',
  59. })
  60. }
  61. if (!errorMsg && !((url.startsWith('http://') || url.startsWith('https://'))))
  62. errorMsg = t(`${ERROR_I18N_PREFIX}.urlError`)
  63. if (!errorMsg && (crawlOptions.limit === null || crawlOptions.limit === undefined || crawlOptions.limit === '')) {
  64. errorMsg = t(`${ERROR_I18N_PREFIX}.fieldRequired`, {
  65. field: t(`${I18N_PREFIX}.limit`),
  66. })
  67. }
  68. return {
  69. isValid: !errorMsg,
  70. errorMsg,
  71. }
  72. }, [crawlOptions, t])
  73. const isInit = step === Step.init
  74. const isCrawlFinished = step === Step.finished
  75. const isRunning = step === Step.running
  76. const [crawlResult, setCrawlResult] = useState<{
  77. current: number
  78. total: number
  79. data: CrawlResultItem[]
  80. time_consuming: number | string
  81. } | undefined>(undefined)
  82. const [crawlErrorMessage, setCrawlErrorMessage] = useState('')
  83. const showError = isCrawlFinished && crawlErrorMessage
  84. const waitForCrawlFinished = useCallback(async (jobId: string) => {
  85. try {
  86. const res = await checkFirecrawlTaskStatus(jobId) as any
  87. if (res.status === 'completed') {
  88. return {
  89. isError: false,
  90. data: {
  91. ...res,
  92. total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
  93. },
  94. }
  95. }
  96. if (res.status === 'error' || !res.status) {
  97. // can't get the error message from the firecrawl api
  98. return {
  99. isError: true,
  100. errorMessage: res.message,
  101. data: {
  102. data: [],
  103. },
  104. }
  105. }
  106. // update the progress
  107. setCrawlResult({
  108. ...res,
  109. total: Math.min(res.total, parseFloat(crawlOptions.limit as string)),
  110. })
  111. onCheckedCrawlResultChange(res.data || []) // default select the crawl result
  112. await sleep(2500)
  113. return await waitForCrawlFinished(jobId)
  114. }
  115. catch (e: any) {
  116. const errorBody = await e.json()
  117. return {
  118. isError: true,
  119. errorMessage: errorBody.message,
  120. data: {
  121. data: [],
  122. },
  123. }
  124. }
  125. }, [crawlOptions.limit])
  126. const handleRun = useCallback(async (url: string) => {
  127. const { isValid, errorMsg } = checkValid(url)
  128. if (!isValid) {
  129. Toast.notify({
  130. message: errorMsg!,
  131. type: 'error',
  132. })
  133. return
  134. }
  135. setStep(Step.running)
  136. try {
  137. const passToServerCrawlOptions: any = {
  138. ...crawlOptions,
  139. }
  140. if (crawlOptions.max_depth === '')
  141. delete passToServerCrawlOptions.max_depth
  142. const res = await createFirecrawlTask({
  143. url,
  144. options: passToServerCrawlOptions,
  145. }) as any
  146. const jobId = res.job_id
  147. onJobIdChange(jobId)
  148. const { isError, data, errorMessage } = await waitForCrawlFinished(jobId)
  149. if (isError) {
  150. setCrawlErrorMessage(errorMessage || t(`${I18N_PREFIX}.unknownError`))
  151. }
  152. else {
  153. setCrawlResult(data)
  154. onCheckedCrawlResultChange(data.data || []) // default select the crawl result
  155. setCrawlErrorMessage('')
  156. }
  157. }
  158. catch (e) {
  159. setCrawlErrorMessage(t(`${I18N_PREFIX}.unknownError`)!)
  160. console.log(e)
  161. }
  162. finally {
  163. setStep(Step.finished)
  164. }
  165. }, [checkValid, crawlOptions, onJobIdChange, t, waitForCrawlFinished])
  166. return (
  167. <div>
  168. <Header onSetting={handleSetting} />
  169. <div className={cn('mt-2 p-4 pb-0 rounded-xl border border-gray-200')}>
  170. <UrlInput onRun={handleRun} isRunning={isRunning} />
  171. <OptionsWrap
  172. className={cn('mt-4')}
  173. controlFoldOptions={controlFoldOptions}
  174. >
  175. <Options className='mt-2' payload={crawlOptions} onChange={onCrawlOptionsChange} />
  176. </OptionsWrap>
  177. {!isInit && (
  178. <div className='mt-3 relative left-[-16px] w-[calc(100%_+_32px)] rounded-b-xl'>
  179. {isRunning
  180. && <Crawling
  181. className='mt-2'
  182. crawledNum={crawlResult?.current || 0}
  183. totalNum={crawlResult?.total || parseFloat(crawlOptions.limit as string) || 0}
  184. />}
  185. {showError && (
  186. <ErrorMessage className='rounded-b-xl' title={t(`${I18N_PREFIX}.exceptionErrorTitle`)} errorMsg={crawlErrorMessage} />
  187. )}
  188. {isCrawlFinished && !showError
  189. && <CrawledResult
  190. className='mb-2'
  191. list={crawlResult?.data || []}
  192. checkedList={checkedCrawlResult}
  193. onSelectedChange={onCheckedCrawlResultChange}
  194. onPreview={onPreview}
  195. usedTime={parseFloat(crawlResult?.time_consuming as string) || 0}
  196. />
  197. }
  198. </div>
  199. )}
  200. </div>
  201. </div>
  202. )
  203. }
  204. export default React.memo(FireCrawl)