Skip to content

Commit 3c9770a

Browse files
committed
feat: add rendering to the sdk
1 parent 441d08d commit 3c9770a

File tree

8 files changed

+153
-19
lines changed

8 files changed

+153
-19
lines changed

scrapegraph-js/examples/utilities/scrape_example.js

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,17 +33,19 @@ const OUTPUT_DIR = 'scrape_output';
3333
* @returns {Object} The API response containing HTML content and metadata
3434
*/
3535
async function scrapeWebsite(websiteUrl, options = {}) {
36-
const { renderHeavyJs = false, headers = {} } = options;
36+
const { renderHeavyJs = false, branding = false, headers = {} } = options;
3737

3838
const jsMode = renderHeavyJs ? 'with heavy JS rendering' : 'without JS rendering';
39+
const brandingMode = branding ? 'with branding' : 'without branding';
3940
console.log(`Getting HTML content from: ${websiteUrl}`);
40-
console.log(`Mode: ${jsMode}`);
41+
console.log(`Mode: ${jsMode}, ${brandingMode}`);
4142

4243
const startTime = Date.now();
4344

4445
try {
4546
const result = await scrape(API_KEY, websiteUrl, {
4647
renderHeavyJs,
48+
branding,
4749
headers
4850
});
4951

@@ -118,12 +120,21 @@ async function main() {
118120
url: 'https://example.com',
119121
name: 'example',
120122
renderHeavyJs: false,
123+
branding: false,
121124
description: 'Simple static website',
122125
},
126+
{
127+
url: 'https://www.cubic.dev/',
128+
name: 'cubic_dev',
129+
renderHeavyJs: false,
130+
branding: true,
131+
description: 'Website with branding enabled',
132+
},
123133
{
124134
url: 'https://httpbin.org/html',
125135
name: 'httpbin_html',
126136
renderHeavyJs: false,
137+
branding: false,
127138
description: 'HTTP testing service',
128139
},
129140
];
@@ -147,7 +158,8 @@ async function main() {
147158
try {
148159
// Get HTML content
149160
const result = await scrapeWebsite(website.url, {
150-
renderHeavyJs: website.renderHeavyJs
161+
renderHeavyJs: website.renderHeavyJs,
162+
branding: website.branding
151163
});
152164

153165
// Display response metadata
@@ -174,7 +186,8 @@ async function main() {
174186
console.log(` Link tags: ${stats.linkTags}`);
175187

176188
// Save HTML content
177-
const filename = `${website.name}_${website.renderHeavyJs ? 'js' : 'nojs'}`;
189+
const brandingSuffix = website.branding ? '_branding' : '';
190+
const filename = `${website.name}_${website.renderHeavyJs ? 'js' : 'nojs'}${brandingSuffix}`;
178191
await saveHtmlContent(htmlContent, filename);
179192

180193
// Show first 500 characters as preview

scrapegraph-js/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "scrapegraph-js",
33
"author": "ScrapeGraphAI",
4-
"version": "0.2.4",
4+
"version": "0.2.5",
55
"description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs. Supports cookies for authentication, infinite scrolling, and pagination.",
66
"repository": {
77
"type": "git",

scrapegraph-js/src/scrape.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
1010
* @param {string} url - The URL of the webpage to be converted.
1111
* @param {Object} options - Optional configuration options.
1212
* @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript (defaults to false).
13+
* @param {boolean} [options.branding=false] - Whether to include branding in the response (defaults to false).
1314
* @param {Object} options.headers - Optional custom headers to send with the request.
1415
* @param {boolean} [options.stealth=false] - Enable stealth mode to avoid bot detection
1516
* @returns {Promise<Object>} A promise that resolves to the HTML content and metadata.
@@ -35,6 +36,12 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
3536
* });
3637
*
3738
* @example
39+
* // With branding enabled:
40+
* const result = await scrape(apiKey, url, {
41+
* branding: true
42+
* });
43+
*
44+
* @example
3845
* // With custom headers:
3946
* const result = await scrape(apiKey, url, {
4047
* renderHeavyJs: false,
@@ -47,6 +54,7 @@ import { getMockResponse, createMockAxiosResponse } from './utils/mockResponse.j
4754
export async function scrape(apiKey, url, options = {}) {
4855
const {
4956
renderHeavyJs = false,
57+
branding = false,
5058
headers: customHeaders = {},
5159
mock = null,
5260
stealth = false
@@ -75,6 +83,10 @@ export async function scrape(apiKey, url, options = {}) {
7583
render_heavy_js: renderHeavyJs,
7684
};
7785

86+
if (branding) {
87+
payload.branding = branding;
88+
}
89+
7890
if (stealth) {
7991
payload.stealth = stealth;
8092
}

scrapegraph-py/examples/scrape/async/async_scrape_example.py

Lines changed: 51 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,17 @@
55
1. How to make async scrape requests
66
2. How to process multiple URLs concurrently
77
3. How to use render_heavy_js for JavaScript-heavy websites
8-
4. How to add custom headers in async mode
8+
4. How to use branding parameter
9+
5. How to add custom headers in async mode
910
1011
Equivalent curl command:
1112
curl -X POST https://api.scrapegraphai.com/v1/scrape \
1213
-H "Content-Type: application/json" \
1314
-H "SGAI-APIKEY: your-api-key-here" \
1415
-d '{
15-
"website_url": "https://example.com",
16-
"render_heavy_js": false
16+
"website_url": "https://www.cubic.dev/",
17+
"render_heavy_js": false,
18+
"branding": true
1719
}'
1820
1921
Requirements:
@@ -165,6 +167,31 @@ async def concurrent_scraping_example():
165167
return results
166168

167169

170+
async def async_scrape_with_branding():
171+
"""Demonstrate async scraping with branding enabled."""
172+
print("\n🏷️ Async Branding Example")
173+
print("=" * 30)
174+
175+
async with AsyncClient.from_env() as client:
176+
try:
177+
print("Making async scrape request with branding enabled...")
178+
result = await client.scrape(
179+
website_url="https://www.cubic.dev/",
180+
render_heavy_js=False,
181+
branding=True
182+
)
183+
184+
html_content = result.get("html", "")
185+
print(f"✅ Success! Received {len(html_content):,} characters of HTML")
186+
print(f"Request ID: {result.get('request_id', 'N/A')}")
187+
188+
return result
189+
190+
except Exception as e:
191+
print(f"❌ Error: {str(e)}")
192+
return None
193+
194+
168195
async def async_scrape_with_custom_headers():
169196
"""Demonstrate async scraping with custom headers."""
170197
print("\n🔧 Async Custom Headers Example")
@@ -228,7 +255,17 @@ def demonstrate_curl_equivalent():
228255
print(" \"render_heavy_js\": false")
229256
print(" }'")
230257

231-
print("\n2. Multiple concurrent requests:")
258+
print("\n2. With branding enabled:")
259+
print("curl -X POST https://api.scrapegraphai.com/v1/scrape \\")
260+
print(" -H \"Content-Type: application/json\" \\")
261+
print(" -H \"SGAI-APIKEY: your-api-key-here\" \\")
262+
print(" -d '{")
263+
print(" \"website_url\": \"https://www.cubic.dev/\",")
264+
print(" \"render_heavy_js\": false,")
265+
print(" \"branding\": true")
266+
print(" }'")
267+
268+
print("\n3. Multiple concurrent requests:")
232269
print("# Run multiple curl commands in parallel:")
233270
print("curl -X POST https://api.scrapegraphai.com/v1/scrape \\")
234271
print(" -H \"Content-Type: application/json\" \\")
@@ -253,7 +290,8 @@ async def main():
253290
# Run async examples
254291
result1 = await basic_async_scrape()
255292
result2 = await async_scrape_with_heavy_js()
256-
result3 = await async_scrape_with_custom_headers()
293+
result3 = await async_scrape_with_branding()
294+
result4 = await async_scrape_with_custom_headers()
257295
concurrent_results = await concurrent_scraping_example()
258296

259297
# Save results if successful
@@ -265,12 +303,18 @@ async def main():
265303
if result3:
266304
html3 = result3.get("html", "")
267305
if html3:
268-
await save_html_to_file_async(html3, "custom_headers_async_scrape")
306+
await save_html_to_file_async(html3, "branding_async_scrape")
307+
308+
if result4:
309+
html4 = result4.get("html", "")
310+
if html4:
311+
await save_html_to_file_async(html4, "custom_headers_async_scrape")
269312

270313
print("\n🎯 Summary:")
271314
print(f"✅ Basic async scrape: {'Success' if result1 else 'Failed'}")
272315
print(f"✅ Heavy JS async scrape: {'Success' if result2 else 'Failed'}")
273-
print(f"✅ Custom headers async scrape: {'Success' if result3 else 'Failed'}")
316+
print(f"✅ Branding async scrape: {'Success' if result3 else 'Failed'}")
317+
print(f"✅ Custom headers async scrape: {'Success' if result4 else 'Failed'}")
274318
print(f"✅ Concurrent scraping: {'Success' if concurrent_results else 'Failed'}")
275319

276320
except Exception as e:

scrapegraph-py/examples/scrape/sync/scrape_example.py

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,18 @@
44
This example shows:
55
1. How to make a basic scrape request
66
2. How to use render_heavy_js for JavaScript-heavy websites
7-
3. How to add custom headers
8-
4. How to handle the response
7+
3. How to use branding parameter
8+
4. How to add custom headers
9+
5. How to handle the response
910
1011
Equivalent curl command:
1112
curl -X POST https://api.scrapegraphai.com/v1/scrape \
1213
-H "Content-Type: application/json" \
1314
-H "SGAI-APIKEY: your-api-key-here" \
1415
-d '{
1516
"website_url": "https://example.com",
16-
"render_heavy_js": false
17+
"render_heavy_js": false,
18+
"branding": true
1719
}'
1820
1921
Requirements:
@@ -98,6 +100,38 @@ def scrape_with_heavy_js():
98100
client.close()
99101

100102

103+
def scrape_with_branding():
104+
"""Demonstrate scraping with branding enabled."""
105+
print("\n🏷️ Branding Example")
106+
print("=" * 30)
107+
108+
client = Client.from_env()
109+
110+
try:
111+
print("Making scrape request with branding enabled...")
112+
result = client.scrape(
113+
website_url="https://www.cubic.dev/",
114+
render_heavy_js=False,
115+
branding=True
116+
)
117+
118+
html_content = result.get("html", "")
119+
print(f"✅ Success! Received {len(html_content):,} characters of HTML")
120+
print(f"Request ID: {result.get('request_id', 'N/A')}")
121+
122+
# Show a preview of the HTML
123+
preview = html_content[:200].replace('\n', ' ').strip()
124+
print(f"HTML Preview: {preview}...")
125+
126+
return result
127+
128+
except Exception as e:
129+
print(f"❌ Error: {str(e)}")
130+
return None
131+
finally:
132+
client.close()
133+
134+
101135
def scrape_with_custom_headers():
102136
"""Demonstrate scraping with custom headers."""
103137
print("\n🔧 Custom Headers Example")
@@ -175,6 +209,16 @@ def demonstrate_curl_equivalent():
175209
print(" \"website_url\": \"https://example.com\",")
176210
print(" \"render_heavy_js\": true")
177211
print(" }'")
212+
213+
print("\n3. With branding enabled:")
214+
print("curl -X POST https://api.scrapegraphai.com/v1/scrape \\")
215+
print(" -H \"Content-Type: application/json\" \\")
216+
print(" -H \"SGAI-APIKEY: your-api-key-here\" \\")
217+
print(" -d '{")
218+
print(" \"website_url\": \"https://www.cubic.dev/\",")
219+
print(" \"render_heavy_js\": false,")
220+
print(" \"branding\": true")
221+
print(" }'")
178222

179223

180224
def main():
@@ -189,7 +233,8 @@ def main():
189233
# Run examples
190234
result1 = basic_scrape_example()
191235
result2 = scrape_with_heavy_js()
192-
result3 = scrape_with_custom_headers()
236+
result3 = scrape_with_branding()
237+
result4 = scrape_with_custom_headers()
193238

194239
# Save results if successful
195240
if result1:
@@ -200,12 +245,18 @@ def main():
200245
if result3:
201246
html3 = result3.get("html", "")
202247
if html3:
203-
save_html_to_file(html3, "custom_headers_scrape")
248+
save_html_to_file(html3, "branding_scrape")
249+
250+
if result4:
251+
html4 = result4.get("html", "")
252+
if html4:
253+
save_html_to_file(html4, "custom_headers_scrape")
204254

205255
print("\n🎯 Summary:")
206256
print(f"✅ Basic scrape: {'Success' if result1 else 'Failed'}")
207257
print(f"✅ Heavy JS scrape: {'Success' if result2 else 'Failed'}")
208-
print(f"✅ Custom headers scrape: {'Success' if result3 else 'Failed'}")
258+
print(f"✅ Branding scrape: {'Success' if result3 else 'Failed'}")
259+
print(f"✅ Custom headers scrape: {'Success' if result4 else 'Failed'}")
209260

210261
except Exception as e:
211262
print(f"❌ Unexpected error: {str(e)}")

scrapegraph-py/scrapegraph_py/async_client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ async def scrape(
481481
self,
482482
website_url: str,
483483
render_heavy_js: bool = False,
484+
branding: bool = False,
484485
headers: Optional[dict[str, str]] = None,
485486
stealth: bool = False,
486487
):
@@ -489,11 +490,13 @@ async def scrape(
489490
Args:
490491
website_url: The URL of the website to get HTML from
491492
render_heavy_js: Whether to render heavy JavaScript (defaults to False)
493+
branding: Whether to include branding in the response (defaults to False)
492494
headers: Optional headers to send with the request
493495
stealth: Enable stealth mode to avoid bot detection
494496
"""
495497
logger.info(f"🔍 Starting scrape request for {website_url}")
496498
logger.debug(f"🔧 Render heavy JS: {render_heavy_js}")
499+
logger.debug(f"🔧 Branding: {branding}")
497500
if headers:
498501
logger.debug("🔧 Using custom headers")
499502
if stealth:
@@ -502,6 +505,7 @@ async def scrape(
502505
request = ScrapeRequest(
503506
website_url=website_url,
504507
render_heavy_js=render_heavy_js,
508+
branding=branding,
505509
headers=headers,
506510
stealth=stealth,
507511
)

scrapegraph-py/scrapegraph_py/client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,7 @@ def scrape(
491491
self,
492492
website_url: str,
493493
render_heavy_js: bool = False,
494+
branding: bool = False,
494495
headers: Optional[dict[str, str]] = None,
495496
mock:bool=False,
496497
stealth:bool=False,
@@ -500,11 +501,13 @@ def scrape(
500501
Args:
501502
website_url: The URL of the website to get HTML from
502503
render_heavy_js: Whether to render heavy JavaScript (defaults to False)
504+
branding: Whether to include branding in the response (defaults to False)
503505
headers: Optional headers to send with the request
504506
stealth: Enable stealth mode to avoid bot detection
505507
"""
506508
logger.info(f"🔍 Starting scrape request for {website_url}")
507509
logger.debug(f"🔧 Render heavy JS: {render_heavy_js}")
510+
logger.debug(f"🔧 Branding: {branding}")
508511
if headers:
509512
logger.debug("🔧 Using custom headers")
510513
if stealth:
@@ -513,6 +516,7 @@ def scrape(
513516
request = ScrapeRequest(
514517
website_url=website_url,
515518
render_heavy_js=render_heavy_js,
519+
branding=branding,
516520
headers=headers,
517521
mock=mock,
518522
stealth=stealth

0 commit comments

Comments
 (0)