Skip to content

Commit

Permalink
feat(page): add page class
Browse files Browse the repository at this point in the history
  • Loading branch information
j-mendez committed Nov 29, 2023
1 parent 2f3ab87 commit 8aaac2c
Show file tree
Hide file tree
Showing 9 changed files with 693 additions and 528 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ indexmap = "2.1.0"
napi = { version = "2.14.1", default-features = false, features = ["napi4", "async", "tokio_rt"] }
napi-derive = "2.14.2"
num_cpus = "1.16.0"
spider = { version = "1.50.12", features = ["napi", "budget", "cron", "regex", "cookies", "socks"] }
spider = { version = "1.50.14", features = ["napi", "budget", "cron", "regex", "cookies", "socks"] }

[target.x86_64-unknown-linux-gnu.dependencies]
openssl-sys = { version = "0.9.96", features = ["vendored"] }
Expand Down
13 changes: 12 additions & 1 deletion __test__/index.spec.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import test from "ava";
import { crawl, Website, type NPage, Cron } from "../index.js";
import { crawl, Website, Page, type NPage, Cron } from "../index.js";

const TEST_URL = "https://choosealicense.com";

Expand Down Expand Up @@ -127,3 +127,14 @@ test("new website native with subscriptions", async (t) => {
// should be valid unless new pages and routes are created.
t.assert(links.length > 1, "should be more than one page");
});

test("new single page", async (t) => {
const page = new Page(TEST_URL);
await page.fetch();
const links = await page.getLinks();

// should be valid unless new pages and routes are created.
t.assert(links.length > 1, "should be more than one link");
t.assert(page.getHtml().length >= 100, "should be valid html");
t.assert(page.getBytes().length >= 100, "should be valid bytes");
});
3 changes: 2 additions & 1 deletion book/src/SUMMARY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
- [Getting started](./getting-started.md)
- [A simple example](./simple.md)

# Config
# Configuration

- [Website](./website.md)
- [PAge](./page.md)

# Usage

Expand Down
54 changes: 54 additions & 0 deletions book/src/page.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Page

A single page on a website, useful if you just need one or the root url.

## New Page

Get a new page with content.

The first param is the url, followed by if subdomains should be included, and last to include TLD's in links.

Calling `page.fetch` is needed to get the content.

```ts
import { Page } from "@spider-rs/spider-rs";

const page = new Page("https://choosealicense.com", false, false);
await page.fetch();
```

## Page Links

get all the links related to a page.

```ts

const page = new Page("https://choosealicense.com", false, false);
await page.fetch();
const links = await page.getLinks();
console.log(links);
```

## Page Html

Get the markup for the page or HTML.

```ts

const page = new Page("https://choosealicense.com", false, false);
await page.fetch();
const html = page.getHtml();
console.log(html);
```

## Page Bytes

Get the raw bytes of a page to store the files in a database.

```ts

const page = new Page("https://choosealicense.com", false, false);
await page.fetch();
const bytes = page.getBytes();
console.log(bytes);
```
13 changes: 13 additions & 0 deletions index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,19 @@ export interface NPage {
}
/** crawl a website gathering all links to array */
export function crawl(url: string): Promise<NWebsite>
/** a simple page object */
export class Page {
/** a new page */
constructor(url: string, subdomains?: boolean | undefined | null, tld?: boolean | undefined | null)
/** get the page content */
fetch(): Promise<this>
/** all links on the page */
getLinks(): Promise<Array<string>>
/** get the html for the page */
getHtml(): string
/** get the bytes for the page */
getBytes(): any
}
/** website main data from rust to node */
export class NWebsite {
/** all of the website links. */
Expand Down
3 changes: 2 additions & 1 deletion index.js
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,9 @@ if (!nativeBinding) {
throw new Error(`Failed to load native binding`)
}

const { NWebsite, crawl, Website, Cron } = nativeBinding
const { Page, NWebsite, crawl, Website, Cron } = nativeBinding

module.exports.Page = Page
module.exports.NWebsite = NWebsite
module.exports.crawl = crawl
module.exports.Website = Website
Expand Down
Loading

0 comments on commit 8aaac2c

Please sign in to comment.