Skip to content

Commit

Permalink
wip: can't deserialize firecrawl response in crawl_operator from craw…
Browse files Browse the repository at this point in the history
…l-worker
  • Loading branch information
skeptrunedev committed Dec 13, 2024
1 parent 5a8f9a7 commit f8451b3
Show file tree
Hide file tree
Showing 10 changed files with 679 additions and 37 deletions.
16 changes: 15 additions & 1 deletion .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"name": "Debug executable 'trieve-server'",
"cargo": {
"args": [
"+default",
"+nightly",
"build",
"--manifest-path=./server/Cargo.toml",
"--bin=trieve-server",
Expand Down Expand Up @@ -35,6 +35,20 @@
"args": [],
"cwd": "${workspaceFolder}/server"
},
{
"type": "lldb",
"request": "launch",
"name": "Debug executable 'crawl-worker'",
"cargo": {
"args": [
"build",
"--manifest-path=./server/Cargo.toml",
"--bin=crawl-worker"
]
},
"args": [],
"cwd": "${workspaceFolder}/server"
},
{
"type": "lldb",
"request": "launch",
Expand Down
258 changes: 258 additions & 0 deletions clients/ts-sdk/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -4387,6 +4387,42 @@
]
}
},
"/api/file/html_page": {
"post": {
"tags": [
"File"
],
"summary": "Upload HTML Page",
"description": "Chunk HTML by headings and queue for indexing into the specified dataset.",
"operationId": "upload_html_page",
"requestBody": {
"description": "JSON request payload to upload a file",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/UploadHtmlPageReqPayload"
}
}
},
"required": true
},
"responses": {
"204": {
"description": "Confirmation that html is being processed"
},
"400": {
"description": "Service error relating to processing the file",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ErrorResponseBody"
}
}
}
}
}
}
},
"/api/file/{file_id}": {
"get": {
"tags": [
Expand Down Expand Up @@ -8460,6 +8496,15 @@
"type": "string",
"description": "The URL to crawl",
"nullable": true
},
"webhook_metadata": {
"description": "Metadata to send back with the webhook call for each successful page scrape",
"nullable": true
},
"webhook_url": {
"type": "string",
"description": "Host to call back on the webhook for each successful page scrape",
"nullable": true
}
},
"example": {
Expand Down Expand Up @@ -9642,6 +9687,44 @@
"dot"
]
},
"Document": {
"type": "object",
"required": [
"metadata"
],
"properties": {
"extract": {
"type": "string",
"nullable": true
},
"html": {
"type": "string",
"nullable": true
},
"links": {
"type": "array",
"items": {
"type": "string"
},
"nullable": true
},
"markdown": {
"type": "string",
"nullable": true
},
"metadata": {
"$ref": "#/components/schemas/Metadata"
},
"rawHtml": {
"type": "string",
"nullable": true
},
"screenshot": {
"type": "string",
"nullable": true
}
}
},
"EditMessageReqPayload": {
"type": "object",
"required": [
Expand Down Expand Up @@ -11680,6 +11763,152 @@
"updated_at": "2021-01-01 00:00:00.000"
}
},
"Metadata": {
"type": "object",
"properties": {
"articleSection": {
"type": "string",
"nullable": true
},
"articleTag": {
"type": "string",
"nullable": true
},
"dcDate": {
"type": "string",
"nullable": true
},
"dcDateCreated": {
"type": "string",
"nullable": true
},
"dcDescription": {
"type": "string",
"nullable": true
},
"dcSubject": {
"type": "string",
"nullable": true
},
"dcTermsAudience": {
"type": "string",
"nullable": true
},
"dcTermsCreated": {
"type": "string",
"nullable": true
},
"dcTermsKeywords": {
"type": "string",
"nullable": true
},
"dcTermsSubject": {
"type": "string",
"nullable": true
},
"dcTermsType": {
"type": "string",
"nullable": true
},
"dcType": {
"type": "string",
"nullable": true
},
"description": {
"type": "string",
"nullable": true
},
"error": {
"type": "string",
"nullable": true
},
"keywords": {
"type": "string",
"nullable": true
},
"language": {
"type": "string",
"nullable": true
},
"modifiedTime": {
"type": "string",
"nullable": true
},
"ogAudio": {
"type": "string",
"nullable": true
},
"ogDescription": {
"type": "string",
"nullable": true
},
"ogDeterminer": {
"type": "string",
"nullable": true
},
"ogImage": {
"type": "string",
"nullable": true
},
"ogLocale": {
"type": "string",
"nullable": true
},
"ogLocaleAlternate": {
"type": "array",
"items": {
"type": "string"
},
"nullable": true
},
"ogSiteName": {
"type": "string",
"nullable": true
},
"ogTitle": {
"type": "string",
"nullable": true
},
"ogUrl": {
"type": "string",
"nullable": true
},
"ogVideo": {
"type": "string",
"nullable": true
},
"publishedTime": {
"type": "string",
"nullable": true
},
"robots": {
"type": "string",
"nullable": true
},
"site_map": {
"allOf": [
{
"$ref": "#/components/schemas/Sitemap"
}
],
"nullable": true
},
"sourceURL": {
"type": "string",
"nullable": true
},
"statusCode": {
"type": "integer",
"format": "int32",
"nullable": true,
"minimum": 0
},
"title": {
"type": "string",
"nullable": true
}
}
},
"MmrOptions": {
"type": "object",
"description": "MMR Options lets you specify different methods to rerank the chunks in the result set using Maximal Marginal Relevance. If not specified, this defaults to the score of the chunks.",
Expand Down Expand Up @@ -15025,6 +15254,17 @@
"pos_in_queue": 1
}
},
"Sitemap": {
"type": "object",
"required": [
"changefreq"
],
"properties": {
"changefreq": {
"type": "string"
}
}
},
"SlimChunkMetadata": {
"type": "object",
"required": [
Expand Down Expand Up @@ -16378,6 +16618,24 @@
}
}
},
"UploadHtmlPageReqPayload": {
"type": "object",
"required": [
"data",
"metadata",
"scrapeId"
],
"properties": {
"data": {
"$ref": "#/components/schemas/Document"
},
"metadata": {},
"scrapeId": {
"type": "string",
"format": "uuid"
}
}
},
"UsageGraphPoint": {
"type": "object",
"required": [
Expand Down
Loading

0 comments on commit f8451b3

Please sign in to comment.