feat: add client integration

VinciGit00 · VinciGit00 · commit 5cbc551092b3 · 2024-11-19T08:08:02.000+01:00
diff --git a/scrapegraph-py/README.md b/scrapegraph-py/README.md
@@ -34,6 +34,51 @@ The SDK provides four main functionalities:
 
 ## Usage
 
+### Basic Web Scraping
+
+```python
+from scrapegraph_py import ScrapeGraphClient, scrape
+from dotenv import load_dotenv
+
+load_dotenv()
+api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+client = ScrapeGraphClient(api_key)
+
+url = "https://scrapegraphai.com/"
+prompt = "What does the company do?"
+
+result = scrape(client, url, prompt)
+print(result)
+```
+
+### Local HTML Scraping
+
+You can also scrape content from local HTML files:
+
+```python
+from scrapegraph_py import ScrapeGraphClient, scrape_text
+from bs4 import BeautifulSoup
+
+def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
+    with open(file_path, 'r', encoding='utf-8') as file:
+        html_content = file.read()
+    
+    # Use BeautifulSoup to extract text content
+    soup = BeautifulSoup(html_content, 'html.parser')
+    text_content = soup.get_text(separator='\n', strip=True)
+    
+    # Use ScrapeGraph AI to analyze the text
+    return scrape_text(client, text_content, prompt)
+
+# Usage
+client = ScrapeGraphClient(api_key)
+result = scrape_local_html(
+    client,
+    'sample.html',
+    "Extract main content and important information"
+)
+print("Extracted Data:", result)
+```
 
 ### Structured Data Extraction
 
diff --git a/scrapegraph-py/examples/credits_example.py b/scrapegraph-py/examples/credits_example.py
@@ -5,15 +5,16 @@
 
 import os
 from dotenv import load_dotenv
-from scrapegraph_py import credits
+from scrapegraph_py import ScrapeGraphClient, credits
 
 # Load environment variables from a .env file
 load_dotenv()
 
 def main():
     api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+    client = ScrapeGraphClient(api_key)
 
-    response = credits(api_key)
+    response = credits(client)
     print("Response from the API:")
     print(response)
 
diff --git a/scrapegraph-py/examples/feedback_example.py b/scrapegraph-py/examples/feedback_example.py
@@ -1,18 +1,18 @@
 import os
 from dotenv import load_dotenv
-from scrapegraph_py import status
-from scrapegraph_py import feedback
+from scrapegraph_py import ScrapeGraphClient, feedback, status
 
 # Load environment variables from .env file
 load_dotenv()
 
 def main():
     # Get API key from environment variables
     api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+    client = ScrapeGraphClient(api_key)
     
     # Check API status
     try:
-        result = status(api_key)
+        result = status(client)
         print(f"API Status: {result}")
     except Exception as e:
         print(f"Error occurred: {e}")
@@ -21,8 +21,8 @@ def main():
     request_id = "3fa85f64-5717-4562-b3fc-2c963f66afa6"
     rating = 5
     feedback_message = "This is a test feedback message."
-    feedback_response = feedback(api_key, request_id, rating, feedback_message)  # Call the feedback function
-    print(f"Feedback Response: {feedback_response}")  # Print the response
+    feedback_response = feedback(client, request_id, rating, feedback_message)
+    print(f"Feedback Response: {feedback_response}")
 
 if __name__ == "__main__":
     main() 
diff --git a/scrapegraph-py/examples/local_scraper_example.py b/scrapegraph-py/examples/local_scraper_example.py
@@ -1,56 +1,45 @@
 from bs4 import BeautifulSoup
 import os
+from scrapegraph_py import ScrapeGraphClient, scrape_text
+from dotenv import load_dotenv
 
-def scrape_local_html(file_path):
+def scrape_local_html(client: ScrapeGraphClient, file_path: str, prompt: str):
     """
-    Scrape content from a local HTML file.
+    Scrape content from a local HTML file using ScrapeGraph AI.
     
     Args:
+        client (ScrapeGraphClient): Initialized ScrapeGraph client
         file_path (str): Path to the local HTML file
+        prompt (str): Natural language prompt describing what to extract
         
     Returns:
-        dict: Extracted data from the HTML file
+        str: Extracted data in JSON format
     """
-    # Check if file exists
     if not os.path.exists(file_path):
         raise FileNotFoundError(f"HTML file not found at: {file_path}")
     
-    # Read the HTML file
     with open(file_path, 'r', encoding='utf-8') as file:
         html_content = file.read()
     
-    # Parse HTML with BeautifulSoup
+    # Use BeautifulSoup to extract text content
     soup = BeautifulSoup(html_content, 'html.parser')
+    text_content = soup.get_text(separator='\n', strip=True)
     
-    # Example extraction - modify based on your HTML structure
-    data = {
-        'title': soup.title.string if soup.title else None,
-        'paragraphs': [p.text for p in soup.find_all('p')],
-        'links': [{'text': a.text, 'href': a.get('href')} for a in soup.find_all('a')],
-        'headers': [h.text for h in soup.find_all(['h1', 'h2', 'h3'])]
-    }
-    
-    return data
+    # Use ScrapeGraph AI to analyze the text
+    return scrape_text(client, text_content, prompt)
 
 def main():
-    # Example usage
+    load_dotenv()
+    api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+    client = ScrapeGraphClient(api_key)
+    
     try:
-        # Assuming you have a sample.html file in the same directory
-        result = scrape_local_html('sample.html')
-        
-        # Print extracted data
-        print("Title:", result['title'])
-        print("\nParagraphs:")
-        for p in result['paragraphs']:
-            print(f"- {p}")
-            
-        print("\nLinks:")
-        for link in result['links']:
-            print(f"- {link['text']}: {link['href']}")
-            
-        print("\nHeaders:")
-        for header in result['headers']:
-            print(f"- {header}")
+        result = scrape_local_html(
+            client,
+            'sample.html',
+            "Extract main content and important information"
+        )
+        print("Extracted Data:", result)
             
     except FileNotFoundError as e:
         print(f"Error: {e}")
diff --git a/scrapegraph-py/examples/scrape_example.py b/scrapegraph-py/examples/scrape_example.py
@@ -1,12 +1,13 @@
 import os
-from scrapegraph_py import scrape
+from scrapegraph_py import ScrapeGraphClient, scrape
 from dotenv import load_dotenv
 
-
 load_dotenv()
 api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+client = ScrapeGraphClient(api_key)
+
 url = "https://scrapegraphai.com/"
 prompt = "What does the company do?"
 
-result = scrape(api_key, url, prompt)
+result = scrape(client, url, prompt)
 print(result)
diff --git a/scrapegraph-py/examples/scrape_schema_example.py b/scrapegraph-py/examples/scrape_schema_example.py
@@ -1,6 +1,6 @@
 import os
 from pydantic import BaseModel, Field
-from scrapegraph_py import scrape
+from scrapegraph_py import ScrapeGraphClient, scrape
 from dotenv import load_dotenv
 
 load_dotenv()
@@ -11,12 +11,13 @@ class CompanyInfoSchema(BaseModel):
     description: str = Field(description="A description of the company")
     main_products: list[str] = Field(description="The main products of the company")
 
-# Example usage
+# Initialize client
 api_key = os.getenv("SCRAPEGRAPH_API_KEY")
+client = ScrapeGraphClient(api_key)
+
 url = "https://scrapegraphai.com/"
 prompt = "What does the company do?"
 
 # Call the scrape function with the schema
-result = scrape(api_key=api_key, url=url, prompt=prompt, schema=CompanyInfoSchema)
-
+result = scrape(client=client, url=url, prompt=prompt, schema=CompanyInfoSchema)
 print(result)
diff --git a/scrapegraph-py/scrapegraph_py/__init__.py b/scrapegraph-py/scrapegraph_py/__init__.py
@@ -1,3 +1,4 @@
+from .client import ScrapeGraphClient
 from .scrape import scrape
 from .credits import credits
 from .feedback import feedback
diff --git a/scrapegraph-py/scrapegraph_py/client.py b/scrapegraph-py/scrapegraph_py/client.py
@@ -0,0 +1,43 @@
+
+class ScrapeGraphClient:
+    """Client for interacting with the ScrapeGraph AI API."""
+    
+    def __init__(self, api_key: str, base_url: str = "https://api.scrapegraphai.com/v1"):
+        """Initialize the ScrapeGraph client.
+        
+        Args:
+            api_key (str): Your ScrapeGraph AI API key
+            base_url (str): Base URL for the API (optional)
+        """
+        self.api_key = api_key
+        self.base_url = base_url.rstrip('/')
+        
+    def get_headers(self, include_content_type: bool = True) -> dict:
+        """Get the headers for API requests.
+        
+        Args:
+            include_content_type (bool): Whether to include Content-Type header
+            
+        Returns:
+            dict: Headers for the API request
+        """
+        headers = {
+            "accept": "application/json",
+            "SGAI-API-KEY": self.api_key
+        }
+        
+        if include_content_type:
+            headers["Content-Type"] = "application/json"
+            
+        return headers
+    
+    def get_endpoint(self, path: str) -> str:
+        """Get the full endpoint URL.
+        
+        Args:
+            path (str): API endpoint path
+            
+        Returns:
+            str: Full endpoint URL
+        """
+        return f"{self.base_url}/api/v1/{path}"
diff --git a/scrapegraph-py/scrapegraph_py/credits.py b/scrapegraph-py/scrapegraph_py/credits.py
@@ -6,28 +6,24 @@
 
 import requests
 import json
+from .client import ScrapeGraphClient
+from .exceptions import raise_for_status_code
 
-def credits(api_key: str) -> str:
+def credits(client: ScrapeGraphClient) -> str:
     """Retrieve credits from the API.
 
     Args:
-        api_key (str): Your ScrapeGraph AI API key.
+        client (ScrapeGraphClient): Initialized ScrapeGraph client
 
     Returns:
         str: Response from the API in JSON format.
     """
-    endpoint = "https://sgai-api.onrender.com/api/v1/credits"
-    headers = {
-        "accept": "application/json",
-        "SGAI-API-KEY": api_key
-    }
+    endpoint = client.get_endpoint("credits")
+    headers = client.get_headers(include_content_type=False)
 
     try:
         response = requests.get(endpoint, headers=headers)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as http_err:
-        return json.dumps({"error": "HTTP error occurred", "message": str(http_err), "status_code": response.status_code})
+        raise_for_status_code(response.status_code, response)
+        return response.text
     except requests.exceptions.RequestException as e:
-        return json.dumps({"error": "An error occurred", "message": str(e)})
-
-    return response.text
+        raise APIError(f"Request failed: {str(e)}", response=None)
diff --git a/scrapegraph-py/scrapegraph_py/exceptions.py b/scrapegraph-py/scrapegraph_py/exceptions.py
@@ -0,0 +1,21 @@
+class APIError(Exception):
+    """Base class for API exceptions."""
+    def __init__(self, message=None, response=None):
+        self.message = message or self.__doc__
+        self.response = response
+        super().__init__(self.message)
+
+class AuthenticationError(APIError):
+    """Raised when API key is invalid or missing."""
+
+class RateLimitError(APIError):
+    """Raised when rate limits are exceeded."""
+    def __init__(self, message=None, reset_time=None, response=None):
+        super().__init__(message, response)
+        self.reset_time = reset_time
+
+class BadRequestError(APIError):
+    """Raised when a 400 Bad Request error occurs."""
+
+class InternalServerError(APIError):
+    """Raised when a 500 Internal Server Error occurs."""
diff --git a/scrapegraph-py/scrapegraph_py/feedback.py b/scrapegraph-py/scrapegraph_py/feedback.py
@@ -7,38 +7,37 @@
 
 import requests
 import json
+from .client import ScrapeGraphClient
+from .exceptions import APIError, BadRequestError
 
-def feedback(api_key: str, request_id: str, rating: int, feedback_text: str) -> str:
+def feedback(client: ScrapeGraphClient, request_id: str, rating: int, feedback_text: str) -> str:
     """Send feedback to the API.
 
     Args:
-        api_key (str): Your ScrapeGraph AI API key.
-        request_id (str): The request ID associated with the feedback.
-        rating (int): The rating score.
-        feedback_text (str): The feedback message to send.
+        client (ScrapeGraphClient): Initialized ScrapeGraph client
+        request_id (str): The request ID associated with the feedback
+        rating (int): The rating score
+        feedback_text (str): The feedback message to send
 
     Returns:
         str: Response from the API in JSON format.
     """
-    endpoint = "https://sgai-api.onrender.com/api/v1/feedback"
-    headers = {
-        "accept": "application/json",
-        "SGAI-API-KEY": api_key,
-        "Content-Type": "application/json"
-    }
+    # Validate rating
+    if not 0 <= rating <= 5:
+        raise BadRequestError("Rating must be between 0 and 5")
+
+    endpoint = client.get_endpoint("feedback")
+    headers = client.get_headers()
     
     feedback_data = {
         "request_id": request_id,
         "rating": rating,
         "feedback_text": feedback_text
-    }  
+    }
 
     try:
         response = requests.post(endpoint, headers=headers, json=feedback_data)
-        response.raise_for_status()
-    except requests.exceptions.HTTPError as http_err:
-        return json.dumps({"error": "HTTP error occurred", "message": str(http_err), "status_code": response.status_code})
+        raise_for_status_code(response.status_code, response)
+        return response.text
     except requests.exceptions.RequestException as e:
-        return json.dumps({"error": "An error occurred", "message": str(e)})
-    
-    return response.text
+        raise APIError(f"Request failed: {str(e)}", response=None)
diff --git a/scrapegraph-py/scrapegraph_py/local_scraper.py b/scrapegraph-py/scrapegraph_py/local_scraper.py
diff --git a/scrapegraph-py/scrapegraph_py/scrape.py b/scrapegraph-py/scrapegraph_py/scrape.py
diff --git a/scrapegraph-py/tests/test_credits.py b/scrapegraph-py/tests/test_credits.py
diff --git a/scrapegraph-py/tests/test_feedback.py b/scrapegraph-py/tests/test_feedback.py
diff --git a/scrapegraph-py/tests/test_local_scraper.py b/scrapegraph-py/tests/test_local_scraper.py