chore(lib): add website class

spider-rs · Nov 27, 2023 · 2043c6e · 2043c6e
1 parent 9dd7620
commit 2043c6e
Show file tree

Hide file tree

Showing 5 changed files with 120 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -7,24 +7,35 @@ The [spider](https://github.com/spider-rs/spider) project ported to nodejs via n
 1. `npm i @spider-rs/spider-rs --save`
 
 ```ts
-import { crawl } from '@spider-rs/spider-rs'
+import { Website } from '@spider-rs/spider-rs'
 
-// gather all the links found in a website fast concurrently.
-const { links, pages } = await crawl("https://rsseau.fr");
+const website = new Website("https://rsseau.fr");
+await website.crawl();
+console.log(website.getLinks())
 ```
 
-## Development
+Collect the resource.
 
-Install the napi cli `npm i @napi-rs/cli --global`.
+```ts
+import { Website } from '@spider-rs/spider-rs'
 
-1. `yarn build:test`
+const website = new Website("https://rsseau.fr");
+await website.scrape();
+console.log(website.getPages())
+```
 
-### TODO: Full Spider Port
+Use the crawl shortcut to get the page content and url.
 
-Port the modules to be used via nodejs to adhere spider interface.
+```ts
+import { crawl } from '@spider-rs/spider-rs'
 
-A full port would require FromNapi support on the following modules.
+const { links, pages } = new crawl("https://rsseau.fr");
+console.log(pages)
+```
+
+
+## Development
+
+Install the napi cli `npm i @napi-rs/cli --global`.
 
-- compact_str
-- case_insensitive_str
-- small_vec
+1. `yarn build:test`
diff --git a/__test__/index.spec.ts b/__test__/index.spec.ts
@@ -1,10 +1,24 @@
 import test from 'ava'
 
-import { crawl } from '../index.js'
+import { crawl, Website } from '../index.js'
 
 test('crawl native', async (t) => {
   const { links, pages } = await crawl("https://rsseau.fr");
 
   t.assert(links.length > 1, "should be more than one link")
   t.assert(pages.length > 1, "should be more than one page")
-})
+})
+
+test('new website native', async (t) => {
+  const website = new Website("https://rsseau.fr");
+  await website.crawl();
+
+  t.assert(website.getLinks().length > 1, "should be more than one link")
+})
+
+test('new website scrape native', async (t) => {
+  const website = new Website("https://rsseau.fr");
+  await website.scrape();
+
+  t.assert(website.getPages().length > 1, "should be more than one page")
+})
diff --git a/index.d.ts b/index.d.ts
@@ -11,10 +11,22 @@ export interface Page {
   content: string
 }
 /** crawl a website gathering all links to array */
-export function crawl(n: string): Promise<Website>
-export class Website {
+export function crawl(url: string): Promise<NWebsite>
+/** website main data from rust to node */
+export class NWebsite {
   /** all of the website links. */
   links: Array<string>
   /** the pages found */
   pages: Array<Page>
 }
+export class Website {
+  constructor(url: string)
+  /** crawl a website */
+  crawl(): Promise<void>
+  /** scrape a website */
+  scrape(): Promise<void>
+  /** get all the links of a website */
+  getLinks(): Array<string>
+  /** get all the pages of a website */
+  getPages(): Array<Page>
+}
diff --git a/index.js b/index.js
@@ -252,7 +252,8 @@ if (!nativeBinding) {
   throw new Error(`Failed to load native binding`)
 }
 
-const { Website, crawl } = nativeBinding
+const { NWebsite, crawl, Website } = nativeBinding
 
-module.exports.Website = Website
+module.exports.NWebsite = NWebsite
 module.exports.crawl = crawl
+module.exports.Website = Website
diff --git a/src/lib.rs b/src/lib.rs
@@ -14,7 +14,8 @@ pub struct Page {
 }
 
 #[napi]
-pub struct Website {
+/// website main data from rust to node
+pub struct NWebsite {
   /// all of the website links.
   pub links: Vec<String>,
   /// the pages found
@@ -23,8 +24,8 @@ pub struct Website {
 
 #[napi]
 /// crawl a website gathering all links to array
-pub async fn crawl(n: String) -> Website {
-  let mut website = spider::website::Website::new(&n);
+pub async fn crawl(url: String) -> NWebsite {
+  let mut website = spider::website::Website::new(&url);
   let mut rx2 = website
     .subscribe(16)
     .expect("sync feature should be enabled");
@@ -59,5 +60,64 @@ pub async fn crawl(n: String) -> Website {
 
   let links = pages.iter().map(|x| x.url.clone()).collect::<Vec<String>>();
 
-  Website { links, pages }
+  NWebsite { links, pages }
+}
+
+#[napi]
+pub struct Website {
+  /// the website from spider
+  inner: spider::website::Website,
+}
+
+#[napi]
+impl Website {
+  #[napi(constructor)]
+  pub fn new(url: String) -> Self {
+    Website {
+      inner: spider::website::Website::new(&url),
+    }
+  }
+  #[napi]
+  /// crawl a website
+  pub async unsafe fn crawl(&mut self) {
+    self.inner.crawl().await;
+  }
+
+  #[napi]
+  /// scrape a website
+  pub async unsafe fn scrape(&mut self) {
+    self.inner.scrape().await;
+  }
+
+  #[napi]
+  /// get all the links of a website
+  pub fn get_links(&self) -> Vec<String> {
+    let links = self
+      .inner
+      .get_links()
+      .iter()
+      .map(|x| x.as_ref().to_string())
+      .collect::<Vec<String>>();
+    links
+  }
+
+  /// get all the pages of a website
+  #[napi]
+  pub fn get_pages(&self) -> Vec<Page> {
+    let mut pages: Vec<Page> = Vec::new();
+
+    match self.inner.get_pages() {
+      Some(p) => {
+        for page in p.iter() {
+          pages.push(Page {
+            url: page.get_url().into(),
+            content: page.get_html(),
+          });
+        }
+      }
+      _ => (),
+    }
+
+    pages
+  }
 }