-
-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
129 additions
and
34 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,102 @@ | ||
declare module 'metascraper' { | ||
export default function MetaParser(rules: RuleSet[]): Scraper; | ||
|
||
type Scraper = (options: ScrapOptions) => Promise<Metadata>; | ||
|
||
interface ScrapOptions { | ||
url: string; | ||
html?: string; | ||
rules?: RuleSet[]; | ||
validateUrl?: boolean; | ||
} | ||
export interface MetascraperOptions { | ||
/** | ||
* The URL associated with the HTML markup. | ||
* It is used for resolve relative links that can be present in the HTML markup. | ||
* it can be used as fallback field for different rules as well. | ||
* | ||
*/ | ||
url: string; | ||
/** | ||
* The HTML markup for extracting the content. | ||
*/ | ||
html?: string; | ||
/** | ||
* You can pass additional rules to add on execution time. | ||
* These rules will be merged with your loaded rules at the beginning. | ||
*/ | ||
rules?: Rules[]; | ||
/** | ||
* Ensure the URL provided is validated as a WHATWG URL API compliant. | ||
*/ | ||
validateUrl?: boolean; | ||
} | ||
|
||
interface Metadata { | ||
author: string; | ||
date: string; | ||
description: string; | ||
image: string; | ||
publisher: string; | ||
title: string; | ||
url: string; | ||
} | ||
export interface Metadata { | ||
/** | ||
* Get audio property from HTML markup | ||
* The package [metascraper-audio](https://example.com/metascraper-audio) needs to be loaded. | ||
*/ | ||
audio?: string; | ||
/** | ||
* Get author property from HTML markup. | ||
* The package [metascraper-author](https://example.com/metascraper-author) needs to be loaded. | ||
*/ | ||
author?: string; | ||
/** | ||
* Get date property from HTML markup. | ||
* The package [metascraper-date](https://example.com/metascraper-date) needs to be loaded. | ||
*/ | ||
date?: string; | ||
/** | ||
* Get description property from HTML markup. | ||
* The package [metascraper-description](https://example.com/metascraper-description) needs to be loaded. | ||
*/ | ||
description?: string; | ||
/** | ||
* Get image property from HTML markup. | ||
* The package [metascraper-image](https://example.com/metascraper-image) needs to be loaded. | ||
*/ | ||
image?: string; | ||
/** | ||
* Get lang property from HTML markup | ||
* The package [metascraper-lang](https://example.com/metascraper-lang) needs to be loaded. | ||
*/ | ||
lang?: string; | ||
/** | ||
* Get logo property from HTML markup | ||
* The package [metascraper-logo](https://example.com/metascraper-logo) needs to be loaded. | ||
*/ | ||
logo?: string; | ||
/** | ||
* Get publisher property from HTML markup | ||
* The package [metascraper-publisher](https://example.com/metascraper-publisher) needs to be loaded. | ||
*/ | ||
publisher?: string; | ||
/** | ||
* Get title property from HTML markup | ||
* The package [metascraper-title](https://example.com/metascraper-title) needs to be loaded. | ||
*/ | ||
title?: string; | ||
/** | ||
* Get url property from HTML markup | ||
* The package [metascraper-url](https://example.com/metascraper-url) needs to be loaded. | ||
*/ | ||
url?: string; | ||
/** | ||
* Get video property from HTML markup | ||
* The package [metascraper-video](https://example.com/metascraper-video) needs to be loaded. | ||
*/ | ||
video?: string; | ||
[key: string]: string | undefined; | ||
} | ||
|
||
type RuleSet = { | ||
[C in keyof Metadata]?: Array<Check>; | ||
} & { | ||
test?: (options: CheckOptions) => boolean; | ||
} | ||
export type Rules = { | ||
[C in keyof Metadata]?: Array<RulesOptions> | RulesOptions; | ||
} & { | ||
test?: (options: RulesTestOptions) => boolean; | ||
}; | ||
|
||
type Check = (options: CheckOptions) => string | null | undefined; | ||
export type RulesOptions = (options: RulesTestOptions) => string | null | undefined; | ||
|
||
interface CheckOptions { | ||
htmlDom: import('cheerio').CheerioAPI; | ||
url: string | ||
} | ||
export interface RulesTestOptions { | ||
htmlDom: import('cheerio').CheerioAPI; | ||
url: string; | ||
} | ||
|
||
declare module 'metascraper-*' { | ||
export default function rules(): import('metascraper').RuleSet; | ||
} | ||
export type Metascraper = (options: MetascraperOptions) => Promise<Metadata>; | ||
|
||
/** | ||
* It creates a [metascraper](https://metascraper.js.org/) instance, declaring the rules bundle to be used explicitly. | ||
* @param rules - The [rules bundles](https://metascraper.js.org/#/?id=rules-bundles) to be applied for metadata extraction. | ||
*/ | ||
export default function createMetascraper(rules: Rules[]): Metascraper; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
import createMetascraper from '../../src' | ||
|
||
/* basic */ | ||
|
||
createMetascraper([]) | ||
|
||
createMetascraper([ | ||
require('metascraper-author')(), | ||
require('metascraper-url')() | ||
]) | ||
|
||
/* methods */ | ||
|
||
const metascraper = createMetascraper([ | ||
require('metascraper-author')(), | ||
require('metascraper-url')() | ||
]) | ||
|
||
const payload = await metascraper({ | ||
url: 'https://example.com', | ||
html: '', | ||
validateUrl: false | ||
}) | ||
|
||
console.log(payload.author) |