Skip to content

Commit

Permalink
fix: fix amazon scrapping (#92)
Browse files Browse the repository at this point in the history
* fix: fix amazon scrapping

* fix: fix URL pattern matching
  • Loading branch information
ricardofelgueiras authored May 13, 2024
1 parent 09f26c5 commit a47b7d9
Show file tree
Hide file tree
Showing 9 changed files with 13 additions and 15 deletions.
2 changes: 1 addition & 1 deletion src/scrappers/amazon.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: 'https://www.amazon.*/s?k=*'
url: 'https://www.amazon.*'
header: Amazon search results
listElementsQuery: '[class*="sg-"][data-cel-widget*="search_result_"]'
elementParser:
Expand Down
2 changes: 1 addition & 1 deletion src/scrappers/g2-search.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://www.g2.com/search
url: https://www.g2.com/search*
header: 'G2 search results'
listElementsQuery: '[class*="paper mb-1"]'
elementParser:
Expand Down
2 changes: 1 addition & 1 deletion src/scrappers/linkedin-groups.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://www.linkedin.com/groups/
url: https://www.linkedin.com/groups/*
header: Linkedin groups
listElementsQuery: .scaffold-finite-scroll__content > div > ul > li
elementParser:
Expand Down
2 changes: 1 addition & 1 deletion src/scrappers/linkedin-jobs.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://www.linkedin.com/jobs/search/
url: https://www.linkedin.com/jobs/search/*
header: Linkedin jobs search results
listElementsQuery: .jobs-search-results__list-item
elementParser:
Expand Down
2 changes: 1 addition & 1 deletion src/scrappers/tik-tok-accounts.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://www.tiktok.com/search/user
url: https://www.tiktok.com/search/user*
header: TikTok Search Results
listElementsQuery: '[class*="DivPanelContainer"] > [class*="-DivLink"]'
elementParser:
Expand Down
2 changes: 1 addition & 1 deletion src/scrappers/tik-tok-video.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://www.tiktok.com/search/video
url: https://www.tiktok.com/search/video*
header: TikTok Search Results
listElementsQuery: '[class*="-DivItemContainerForSearch"]'
elementParser:
Expand Down
2 changes: 1 addition & 1 deletion src/scrappers/yellow-pages.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://www.yellowpages.com/search
url: https://www.yellowpages.com/search*
listElementsQuery: .result
elementParser:
- title: Logo
Expand Down
6 changes: 3 additions & 3 deletions src/scrappers/youtube.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
url:
- https://www.youtube.com/feed/history
- https://www.youtube.com/results
- https://www.youtube.com/playlist
- https://www.youtube.com/feed/history*
- https://www.youtube.com/results*
- https://www.youtube.com/playlist*
listElementsQuery: 'ytd-video-renderer, ytd-playlist-video-renderer, ytd-rich-item-renderer'
elementParser:
- title: Video thumbnail
Expand Down
8 changes: 3 additions & 5 deletions src/utils/urlUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ export function urlMatchesPatternUrl(url: string, patternURL: string): boolean {
if (!patternURL) {
return false;
}
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const pattern = new URLPattern(patternURL);

return pattern.test(url);
}
const regex = new RegExp('^' + patternURL.replace(/\*/g, '.*') + '$');
return regex.test(url);
}

0 comments on commit a47b7d9

Please sign in to comment.