From c3b9b4fd3aee3f8b802ee2b92579a8bcb806bfcc Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Thu, 7 Dec 2023 11:58:56 +0100 Subject: [PATCH] feat: better type definitions (#673) --- packages/metascraper/package.json | 9 +- packages/metascraper/src/index.d.ts | 129 +++++++++++++----- .../metascraper/test/types/index.test-d.ts | 25 ++++ 3 files changed, 129 insertions(+), 34 deletions(-) create mode 100644 packages/metascraper/test/types/index.test-d.ts diff --git a/packages/metascraper/package.json b/packages/metascraper/package.json index 0945a58e5..cc036819d 100644 --- a/packages/metascraper/package.json +++ b/packages/metascraper/package.json @@ -4,6 +4,7 @@ "homepage": "https://metascraper.js.org", "version": "5.40.0", "main": "src/index.js", + "types": "src/index.d.ts", "repository": { "type": "git", "url": "git+https://github.com/microlinkhq/metascraper.git" @@ -63,7 +64,8 @@ "whoops": "~4.1.4" }, "devDependencies": { - "ava": "5" + "ava": "5", + "tsd": "latest" }, "engines": { "node": ">= 16" @@ -72,11 +74,16 @@ "src" ], "scripts": { + "lint": "tsd", + "pretest": "npm run lint", "test": "NODE_PATH=.. TZ=UTC ava --timeout 15s" }, "license": "MIT", "ava": { "failFast": true, "timeout": "5m" + }, + "tsd": { + "directory": "test/types" } } diff --git a/packages/metascraper/src/index.d.ts b/packages/metascraper/src/index.d.ts index 06d23e607..dd22f94c5 100644 --- a/packages/metascraper/src/index.d.ts +++ b/packages/metascraper/src/index.d.ts @@ -1,39 +1,102 @@ -declare module 'metascraper' { - export default function MetaParser(rules: RuleSet[]): Scraper; - - type Scraper = (options: ScrapOptions) => Promise; - - interface ScrapOptions { - url: string; - html?: string; - rules?: RuleSet[]; - validateUrl?: boolean; - } +export interface MetascraperOptions { + /** + * The URL associated with the HTML markup. + * It is used for resolve relative links that can be present in the HTML markup. + * it can be used as fallback field for different rules as well. + * + */ + url: string; + /** + * The HTML markup for extracting the content. + */ + html?: string; + /** + * You can pass additional rules to add on execution time. + * These rules will be merged with your loaded rules at the beginning. + */ + rules?: Rules[]; + /** + * Ensure the URL provided is validated as a WHATWG URL API compliant. + */ + validateUrl?: boolean; +} - interface Metadata { - author: string; - date: string; - description: string; - image: string; - publisher: string; - title: string; - url: string; - } +export interface Metadata { + /** + * Get audio property from HTML markup + * The package [metascraper-audio](https://example.com/metascraper-audio) needs to be loaded. + */ + audio?: string; + /** + * Get author property from HTML markup. + * The package [metascraper-author](https://example.com/metascraper-author) needs to be loaded. + */ + author?: string; + /** + * Get date property from HTML markup. + * The package [metascraper-date](https://example.com/metascraper-date) needs to be loaded. + */ + date?: string; + /** + * Get description property from HTML markup. + * The package [metascraper-description](https://example.com/metascraper-description) needs to be loaded. + */ + description?: string; + /** + * Get image property from HTML markup. + * The package [metascraper-image](https://example.com/metascraper-image) needs to be loaded. + */ + image?: string; + /** + * Get lang property from HTML markup + * The package [metascraper-lang](https://example.com/metascraper-lang) needs to be loaded. + */ + lang?: string; + /** + * Get logo property from HTML markup + * The package [metascraper-logo](https://example.com/metascraper-logo) needs to be loaded. + */ + logo?: string; + /** + * Get publisher property from HTML markup + * The package [metascraper-publisher](https://example.com/metascraper-publisher) needs to be loaded. + */ + publisher?: string; + /** + * Get title property from HTML markup + * The package [metascraper-title](https://example.com/metascraper-title) needs to be loaded. + */ + title?: string; + /** + * Get url property from HTML markup + * The package [metascraper-url](https://example.com/metascraper-url) needs to be loaded. + */ + url?: string; + /** + * Get video property from HTML markup + * The package [metascraper-video](https://example.com/metascraper-video) needs to be loaded. + */ + video?: string; + [key: string]: string | undefined; +} - type RuleSet = { - [C in keyof Metadata]?: Array; - } & { - test?: (options: CheckOptions) => boolean; - } +export type Rules = { + [C in keyof Metadata]?: Array | RulesOptions; +} & { + test?: (options: RulesTestOptions) => boolean; +}; - type Check = (options: CheckOptions) => string | null | undefined; +export type RulesOptions = (options: RulesTestOptions) => string | null | undefined; - interface CheckOptions { - htmlDom: import('cheerio').CheerioAPI; - url: string - } +export interface RulesTestOptions { + htmlDom: import('cheerio').CheerioAPI; + url: string; } -declare module 'metascraper-*' { - export default function rules(): import('metascraper').RuleSet; -} +export type Metascraper = (options: MetascraperOptions) => Promise; + +/** + * It creates a [metascraper](https://metascraper.js.org/) instance, declaring the rules bundle to be used explicitly. + * @param rules - The [rules bundles](https://metascraper.js.org/#/?id=rules-bundles) to be applied for metadata extraction. + */ +export default function createMetascraper(rules: Rules[]): Metascraper; diff --git a/packages/metascraper/test/types/index.test-d.ts b/packages/metascraper/test/types/index.test-d.ts new file mode 100644 index 000000000..aa7c1e16f --- /dev/null +++ b/packages/metascraper/test/types/index.test-d.ts @@ -0,0 +1,25 @@ +import createMetascraper from '../../src' + +/* basic */ + +createMetascraper([]) + +createMetascraper([ + require('metascraper-author')(), + require('metascraper-url')() +]) + +/* methods */ + +const metascraper = createMetascraper([ + require('metascraper-author')(), + require('metascraper-url')() +]) + +const payload = await metascraper({ + url: 'https://example.com', + html: '', + validateUrl: false +}) + +console.log(payload.author)