Skip to content

Commit 8d6994f

Browse files
committed
feat: add render_heavy_js
1 parent c0cc796 commit 8d6994f

File tree

10 files changed

+171
-7
lines changed

10 files changed

+171
-7
lines changed

scrapegraph-js/package-lock.json

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scrapegraph-js/package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "scrapegraph-js",
33
"author": "ScrapeGraphAI",
4-
"version": "0.1.3",
4+
"version": "0.1.4",
55
"description": "Scrape and extract structured data from a webpage using ScrapeGraphAI's APIs. Supports cookies for authentication, infinite scrolling, and pagination.",
66
"repository": {
77
"type": "git",

scrapegraph-js/src/agenticScraper.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import { getMockResponse } from './utils/mockResponse.js';
1515
* @param {boolean} [aiExtraction=false] - Whether to use AI for data extraction from the scraped content
1616
* @param {Object} options - Optional configuration options
1717
* @param {boolean} options.mock - Override mock mode for this request
18+
* @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript on the page
1819
* @returns {Promise<Object>} Response from the API containing request_id and initial status
1920
* @throws {Error} Will throw an error in case of an HTTP failure or invalid parameters.
2021
*
@@ -65,7 +66,7 @@ import { getMockResponse } from './utils/mockResponse.js';
6566
* }
6667
*/
6768
export async function agenticScraper(apiKey, url, steps, useSession = true, userPrompt = null, outputSchema = null, aiExtraction = false, options = {}) {
68-
const { mock = null } = options;
69+
const { mock = null, renderHeavyJs = false } = options;
6970

7071
// Check if mock mode is enabled
7172
const useMock = mock !== null ? mock : isMockEnabled();
@@ -129,6 +130,7 @@ export async function agenticScraper(apiKey, url, steps, useSession = true, user
129130
use_session: useSession,
130131
steps: steps,
131132
ai_extraction: aiExtraction,
133+
render_heavy_js: renderHeavyJs,
132134
};
133135

134136
// Add AI extraction parameters if enabled

scrapegraph-js/src/crawl.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import { getMockResponse } from './utils/mockResponse.js';
2121
* @param {boolean} [options.sitemap] - Whether to use sitemap for better page discovery
2222
* @param {number} [options.batchSize=1] - Batch size for processing pages (1-10)
2323
* @param {boolean} [options.mock] - Override mock mode for this request
24+
* @param {boolean} [options.renderHeavyJs=false] - Whether to render heavy JavaScript on the page
2425
* @returns {Promise<Object>} The crawl job response
2526
* @throws {Error} Throws an error if the HTTP request fails
2627
*/
@@ -31,7 +32,7 @@ export async function crawl(
3132
schema,
3233
options = {}
3334
) {
34-
const { mock = null } = options;
35+
const { mock = null, renderHeavyJs = false } = options;
3536

3637
// Check if mock mode is enabled
3738
const useMock = mock !== null ? mock : isMockEnabled();
@@ -77,6 +78,7 @@ export async function crawl(
7778
max_pages: maxPages,
7879
same_domain_only: sameDomainOnly,
7980
batch_size: batchSize,
81+
render_heavy_js: renderHeavyJs,
8082
};
8183

8284
try {

scrapegraph-js/src/searchScraper.js

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ import { getMockResponse } from './utils/mockResponse.js';
1717
* @param {String} userAgent - the user agent like "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
1818
* @param {Object} options - Optional configuration options
1919
* @param {boolean} options.mock - Override mock mode for this request
20+
* @param {boolean} options.renderHeavyJs - Whether to render heavy JavaScript on the page
2021
* @returns {Promise<string>} Extracted data in JSON format matching the provided schema
2122
* @throws - Will throw an error in case of an HTTP failure.
2223
*/
2324
export async function searchScraper(apiKey, prompt, numResults = 3, schema = null, userAgent = null, options = {}) {
24-
const { mock = null } = options;
25+
const { mock = null, renderHeavyJs = false } = options;
2526

2627
// Check if mock mode is enabled
2728
const useMock = mock !== null ? mock : isMockEnabled();
@@ -49,6 +50,7 @@ export async function searchScraper(apiKey, prompt, numResults = 3, schema = nul
4950
const payload = {
5051
user_prompt: prompt,
5152
num_results: numResults,
53+
render_heavy_js: renderHeavyJs,
5254
};
5355

5456
if (schema) {

scrapegraph-py/scrapegraph_py/models/agenticscraper.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ class AgenticScraperRequest(BaseModel):
5858
"and user agent",
5959
)
6060
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
61+
render_heavy_js: bool = Field(default=False, description="Whether to render heavy JavaScript on the page")
62+
6163
@model_validator(mode="after")
6264
def validate_url(self) -> "AgenticScraperRequest":
6365
if not self.url.strip():

scrapegraph-py/scrapegraph_py/models/crawl.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class CrawlRequest(BaseModel):
5656
description="Optional headers to send with the request, including cookies "
5757
"and user agent",
5858
)
59+
render_heavy_js: bool = Field(default=False, description="Whether to render heavy JavaScript on the page")
5960

6061
@model_validator(mode="after")
6162
def validate_url(self) -> "CrawlRequest":

scrapegraph-py/scrapegraph_py/models/searchscraper.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ class SearchScraperRequest(BaseModel):
2727
)
2828
output_schema: Optional[Type[BaseModel]] = None
2929
mock: bool = Field(default=False, description="Whether to use mock mode for the request")
30+
render_heavy_js: bool = Field(default=False, description="Whether to render heavy JavaScript on the page")
3031

3132
@model_validator(mode="after")
3233
def validate_user_prompt(self) -> "SearchScraperRequest":

test_js_render_heavy_js.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

test_python_render_heavy_js.py

Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Test script to verify render_heavy_js parameter implementation in Python SDK
4+
"""
5+
6+
import sys
7+
import os
8+
sys.path.append(os.path.join(os.path.dirname(__file__), 'scrapegraph-py'))
9+
10+
from scrapegraph_py.models.smartscraper import SmartScraperRequest
11+
from scrapegraph_py.models.scrape import ScrapeRequest
12+
from scrapegraph_py.models.agenticscraper import AgenticScraperRequest
13+
from scrapegraph_py.models.crawl import CrawlRequest
14+
from scrapegraph_py.models.searchscraper import SearchScraperRequest
15+
16+
def test_smartscraper_render_heavy_js():
17+
"""Test SmartScraperRequest with render_heavy_js parameter"""
18+
print("Testing SmartScraperRequest with render_heavy_js...")
19+
20+
# Test with render_heavy_js=True
21+
request = SmartScraperRequest(
22+
user_prompt="Extract company info",
23+
website_url="https://example.com",
24+
render_heavy_js=True
25+
)
26+
27+
data = request.model_dump()
28+
assert data['render_heavy_js'] == True, "render_heavy_js should be True"
29+
print("✅ SmartScraperRequest with render_heavy_js=True works")
30+
31+
# Test with render_heavy_js=False (default)
32+
request = SmartScraperRequest(
33+
user_prompt="Extract company info",
34+
website_url="https://example.com"
35+
)
36+
37+
data = request.model_dump()
38+
assert data['render_heavy_js'] == False, "render_heavy_js should default to False"
39+
print("✅ SmartScraperRequest with render_heavy_js=False (default) works")
40+
41+
def test_scrape_render_heavy_js():
42+
"""Test ScrapeRequest with render_heavy_js parameter"""
43+
print("Testing ScrapeRequest with render_heavy_js...")
44+
45+
# Test with render_heavy_js=True
46+
request = ScrapeRequest(
47+
website_url="https://example.com",
48+
render_heavy_js=True
49+
)
50+
51+
data = request.model_dump()
52+
assert data['render_heavy_js'] == True, "render_heavy_js should be True"
53+
print("✅ ScrapeRequest with render_heavy_js=True works")
54+
55+
# Test with render_heavy_js=False (default)
56+
request = ScrapeRequest(
57+
website_url="https://example.com"
58+
)
59+
60+
data = request.model_dump()
61+
assert data['render_heavy_js'] == False, "render_heavy_js should default to False"
62+
print("✅ ScrapeRequest with render_heavy_js=False (default) works")
63+
64+
def test_agenticscraper_render_heavy_js():
65+
"""Test AgenticScraperRequest with render_heavy_js parameter"""
66+
print("Testing AgenticScraperRequest with render_heavy_js...")
67+
68+
# Test with render_heavy_js=True
69+
request = AgenticScraperRequest(
70+
url="https://example.com",
71+
steps=["Click button", "Fill form"],
72+
render_heavy_js=True
73+
)
74+
75+
data = request.model_dump()
76+
assert data['render_heavy_js'] == True, "render_heavy_js should be True"
77+
print("✅ AgenticScraperRequest with render_heavy_js=True works")
78+
79+
# Test with render_heavy_js=False (default)
80+
request = AgenticScraperRequest(
81+
url="https://example.com",
82+
steps=["Click button", "Fill form"]
83+
)
84+
85+
data = request.model_dump()
86+
assert data['render_heavy_js'] == False, "render_heavy_js should default to False"
87+
print("✅ AgenticScraperRequest with render_heavy_js=False (default) works")
88+
89+
def test_crawl_render_heavy_js():
90+
"""Test CrawlRequest with render_heavy_js parameter"""
91+
print("Testing CrawlRequest with render_heavy_js...")
92+
93+
# Test with render_heavy_js=True
94+
request = CrawlRequest(
95+
url="https://example.com",
96+
prompt="Extract data",
97+
data_schema={"type": "object"},
98+
render_heavy_js=True
99+
)
100+
101+
data = request.model_dump()
102+
assert data['render_heavy_js'] == True, "render_heavy_js should be True"
103+
print("✅ CrawlRequest with render_heavy_js=True works")
104+
105+
# Test with render_heavy_js=False (default)
106+
request = CrawlRequest(
107+
url="https://example.com",
108+
prompt="Extract data",
109+
data_schema={"type": "object"}
110+
)
111+
112+
data = request.model_dump()
113+
assert data['render_heavy_js'] == False, "render_heavy_js should default to False"
114+
print("✅ CrawlRequest with render_heavy_js=False (default) works")
115+
116+
def test_searchscraper_render_heavy_js():
117+
"""Test SearchScraperRequest with render_heavy_js parameter"""
118+
print("Testing SearchScraperRequest with render_heavy_js...")
119+
120+
# Test with render_heavy_js=True
121+
request = SearchScraperRequest(
122+
user_prompt="What is Python?",
123+
render_heavy_js=True
124+
)
125+
126+
data = request.model_dump()
127+
assert data['render_heavy_js'] == True, "render_heavy_js should be True"
128+
print("✅ SearchScraperRequest with render_heavy_js=True works")
129+
130+
# Test with render_heavy_js=False (default)
131+
request = SearchScraperRequest(
132+
user_prompt="What is Python?"
133+
)
134+
135+
data = request.model_dump()
136+
assert data['render_heavy_js'] == False, "render_heavy_js should default to False"
137+
print("✅ SearchScraperRequest with render_heavy_js=False (default) works")
138+
139+
if __name__ == "__main__":
140+
print("🧪 Testing render_heavy_js parameter implementation in Python SDK\n")
141+
142+
try:
143+
test_smartscraper_render_heavy_js()
144+
test_scrape_render_heavy_js()
145+
test_agenticscraper_render_heavy_js()
146+
test_crawl_render_heavy_js()
147+
test_searchscraper_render_heavy_js()
148+
149+
print("\n🎉 All Python SDK tests passed!")
150+
151+
except Exception as e:
152+
print(f"\n❌ Test failed: {e}")
153+
sys.exit(1)

0 commit comments

Comments
 (0)