Skip to content

Commit

Permalink
feature: add video worker
Browse files Browse the repository at this point in the history
  • Loading branch information
densumesh authored and cdxker committed Dec 19, 2024
1 parent 81a4e7e commit 88875a8
Show file tree
Hide file tree
Showing 10 changed files with 881 additions and 117 deletions.
29 changes: 28 additions & 1 deletion clients/ts-sdk/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -8575,6 +8575,11 @@
}
}
},
"CrawlYoutubeOptions": {
"type": "object",
"title": "CrawlYoutubeOptions",
"description": "Options for Crawling Youtube"
},
"CreateApiKeyReqPayload": {
"type": "object",
"required": [
Expand Down Expand Up @@ -10047,7 +10052,8 @@
"crawl_started",
"csv_jsonl_processing_failed",
"csv_jsonl_processing_checkpoint",
"csv_jsonl_processing_completed"
"csv_jsonl_processing_completed",
"video_uploaded"
]
},
"EventTypes": {
Expand Down Expand Up @@ -14034,6 +14040,27 @@
}
}
]
},
{
"allOf": [
{
"$ref": "#/components/schemas/CrawlYoutubeOptions"
},
{
"type": "object",
"required": [
"type"
],
"properties": {
"type": {
"type": "string",
"enum": [
"youtube"
]
}
}
}
]
}
],
"description": "Options for including an openapi spec or shopify settigns",
Expand Down
11 changes: 10 additions & 1 deletion clients/ts-sdk/src/types.gen.ts
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,13 @@ export type CrawlShopifyOptions = {
tag_regexes?: Array<(string)> | null;
};

/**
* Options for Crawling Youtube
*/
export type CrawlYoutubeOptions = {
[key: string]: unknown;
};

export type CreateApiKeyReqPayload = {
/**
* The dataset ids which the api key will have access to. If not provided or empty, the api key will have access to all datasets in the dataset.
Expand Down Expand Up @@ -1238,7 +1245,7 @@ export type EventReturn = {
page_count: number;
};

export type EventTypeRequest = 'file_uploaded' | 'file_upload_failed' | 'chunks_uploaded' | 'chunk_action_failed' | 'chunk_updated' | 'bulk_chunks_deleted' | 'dataset_delete_failed' | 'qdrant_upload_failed' | 'bulk_chunk_upload_failed' | 'group_chunks_updated' | 'group_chunks_action_failed' | 'crawl_completed' | 'crawl_failed' | 'crawl_started' | 'csv_jsonl_processing_failed' | 'csv_jsonl_processing_checkpoint' | 'csv_jsonl_processing_completed';
export type EventTypeRequest = 'file_uploaded' | 'file_upload_failed' | 'chunks_uploaded' | 'chunk_action_failed' | 'chunk_updated' | 'bulk_chunks_deleted' | 'dataset_delete_failed' | 'qdrant_upload_failed' | 'bulk_chunk_upload_failed' | 'group_chunks_updated' | 'group_chunks_action_failed' | 'crawl_completed' | 'crawl_failed' | 'crawl_started' | 'csv_jsonl_processing_failed' | 'csv_jsonl_processing_checkpoint' | 'csv_jsonl_processing_completed' | 'video_uploaded';

export type EventTypes = {
/**
Expand Down Expand Up @@ -2505,6 +2512,8 @@ export type ScrapeOptions = (CrawlOpenAPIOptions & {
type: 'openapi';
}) | (CrawlShopifyOptions & {
type: 'shopify';
}) | (CrawlYoutubeOptions & {
type: 'youtube';
});

export type type5 = 'openapi';
Expand Down
58 changes: 56 additions & 2 deletions frontends/dashboard/src/pages/dataset/CrawlingSettings.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ export const defaultCrawlOptions: CrawlOptions = {
};

export type FlatCrawlOptions = Omit<CrawlOptions, "scrape_options"> & {
type?: "openapi" | "shopify";
type?: "openapi" | "shopify" | "youtube";
openapi_schema_url?: string;
openapi_tag?: string;
group_variants?: boolean | null;
Expand Down Expand Up @@ -83,6 +83,21 @@ export const unflattenCrawlOptions = (
tag_regexes: options.tag_regexes ?? [],
},
};
} else if (options && options.type == "youtube") {
return {
allow_external_links: options.allow_external_links,
boost_titles: options.boost_titles,
exclude_paths: options.exclude_paths,
exclude_tags: options.exclude_tags,
include_paths: options.include_paths,
include_tags: options.include_tags,
interval: options.interval,
limit: options.limit,
site_url: options.site_url,
scrape_options: {
type: "youtube",
},
};
}
return {
allow_external_links: options.allow_external_links,
Expand Down Expand Up @@ -115,6 +130,11 @@ export const flattenCrawlOptions = (
group_variants: options.scrape_options.group_variants,
tag_regexes: options.scrape_options.tag_regexes,
};
} else if (options.scrape_options?.type == "youtube") {
return {
...options,
type: "youtube",
};
} else {
return {
...options,
Expand Down Expand Up @@ -177,6 +197,7 @@ export const CrawlingSettings = () => {
}));

const onSave = (options: CrawlOptions) => {
console.log("options", options);
updateDatasetMutation.mutate(options);
};

Expand Down Expand Up @@ -249,6 +270,7 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => {

const isShopify = createMemo(() => options.type === "shopify");
const isOpenAPI = createMemo(() => options.type === "openapi");
const isYoutube = createMemo(() => options.type === "youtube");

const submit = (curOptions: FlatCrawlOptions) => {
const validateResult = validateFlatCrawlOptions(curOptions);
Expand Down Expand Up @@ -308,7 +330,7 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => {
</div>
</div>

<div class="flex items-center gap-3 py-2 pt-4">
<div class="flex items-center gap-2 py-2 pt-4">
<div class="flex items-center gap-2">
<label class="block">Boost Titles</label>
<Tooltip
Expand Down Expand Up @@ -389,6 +411,38 @@ const RealCrawlingSettings = (props: RealCrawlingSettingsProps) => {
type="checkbox"
/>
</div>
<div class="flex items-center gap-2 pl-4">
<label class="block">Youtube Channel?</label>
<Tooltip
tooltipText="Check this if the url is to a youtube channel"
body={<FaRegularCircleQuestion class="h-3 w-3 text-black" />}
/>
<input
onChange={(e) =>
setOptions((prev) => {
if (!e.currentTarget.checked) {
if (prev.type === "youtube") {
return {
...prev,
type: undefined,
};
}
return {
...prev,
};
} else {
return {
...prev,
type: "youtube",
};
}
})
}
checked={isYoutube()}
class="h-3 w-3 rounded border border-neutral-300 bg-neutral-100 p-1 accent-magenta-400 dark:border-neutral-900 dark:bg-neutral-800"
type="checkbox"
/>
</div>
</div>

<div class="flex items-center gap-3 py-2 pt-4">
Expand Down
Loading

0 comments on commit 88875a8

Please sign in to comment.