diff --git a/requirements.txt b/requirements.txt
index 60035ec1..3932f4bb 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -17,3 +17,5 @@ tomlkit >= 0.12.2
 tqdm-loggable >= 0.1.4
 urllib3 >= 1.26.6
 watchdog >= 3.0.0
+uvloop
+orjson
diff --git a/runpod/serverless/modules/rp_scale.py b/runpod/serverless/modules/rp_scale.py
index 7c05ef9c..4c1577cf 100644
--- a/runpod/serverless/modules/rp_scale.py
+++ b/runpod/serverless/modules/rp_scale.py
@@ -1,65 +1,128 @@
-"""
-runpod | serverless | rp_scale.py
-Provides the functionality for scaling the runpod serverless worker.
-"""
-
 import asyncio
+
+# OPTIMIZATION 1: Use uvloop for 2-4x faster event loop
+try:
+    import uvloop
+    asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+except ImportError:
+    print("⚠️  RunPod: Install uvloop: pip install uvloop")
+
+try:
+    import orjson
+    import json as stdlib_json
+
+    # Safe wrapper for orjson.loads to ignore unexpected keyword arguments
+    def safe_orjson_loads(s, **kwargs):
+        return orjson.loads(s)
+
+    def safe_orjson_dumps(obj, **kwargs):
+        return orjson.dumps(obj).decode('utf-8')
+
+    # Monkey-patch json globally but safely
+    stdlib_json.loads = safe_orjson_loads
+    stdlib_json.dumps = safe_orjson_dumps
+    
+except ImportError:
+    print("⚠️  RunPod: Install orjson: pip install orjson")
+
+
 import signal
 import sys
+import time
 import traceback
-from typing import Any, Dict
+from typing import Any, Dict, List, Optional
+import threading
+from collections import deque
 
 from ...http_client import AsyncClientSession, ClientSession, TooManyRequests
-from .rp_job import get_job, handle_job
+from .rp_job import get_job, handle_job, job_progress
 from .rp_logger import RunPodLogger
 from .worker_state import JobsProgress, IS_LOCAL_TEST
 
 log = RunPodLogger()
-job_progress = JobsProgress()
 
 
+# ============================================================================
+#  3: Job Caching for Batch Fetching
+# ============================================================================
+
+class JobCache:
+    """Cache excess jobs to reduce API calls"""
+    
+    def __init__(self, max_cache_size: int = 100):
+        self._cache = deque(maxlen=max_cache_size)
+        self._lock = asyncio.Lock()
+    
+    async def get_jobs(self, count: int) -> List[Dict[str, Any]]:
+        """Get jobs from cache"""
+        async with self._lock:
+            jobs = []
+            for _ in range(min(count, len(self._cache))):
+                if self._cache:
+                    jobs.append(self._cache.popleft())
+            return jobs
+    
+    async def add_jobs(self, jobs: List[Dict[str, Any]]) -> None:
+        """Add excess jobs to cache"""
+        async with self._lock:
+            self._cache.extend(jobs)
+    
+    def size(self) -> int:
+        """Get cache size"""
+        return len(self._cache)
+
+
+# ============================================================================
+# OPTIMIZED JobScaler Class
+# ============================================================================
+
 def _handle_uncaught_exception(exc_type, exc_value, exc_traceback):
     exc = traceback.format_exception(exc_type, exc_value, exc_traceback)
     log.error(f"Uncaught exception | {exc}")
 
 
 def _default_concurrency_modifier(current_concurrency: int) -> int:
-    """
-    Default concurrency modifier.
-
-    This function returns the current concurrency without any modification.
-
-    Args:
-        current_concurrency (int): The current concurrency.
-
-    Returns:
-        int: The current concurrency.
-    """
     return current_concurrency
 
 
 class JobScaler:
     """
-    Job Scaler. This class is responsible for scaling the number of concurrent requests.
+    Optimized Job Scaler with all performance improvements
     """
 
     def __init__(self, config: Dict[str, Any]):
         self._shutdown_event = asyncio.Event()
         self.current_concurrency = 1
         self.config = config
-
+        
+        # Use standard queue but with optimized patterns
         self.jobs_queue = asyncio.Queue(maxsize=self.current_concurrency)
+        
+        # OPTIMIZATION: Job cache for batch fetching
+        self._job_cache = JobCache(max_cache_size=100)
+        
+        # OPTIMIZATION: Track queue size to avoid expensive qsize() calls
+        self._queue_size = 0
+        self._queue_lock = asyncio.Lock()
 
         self.concurrency_modifier = _default_concurrency_modifier
         self.jobs_fetcher = get_job
         self.jobs_fetcher_timeout = 90
         self.jobs_handler = handle_job
 
+        # Performance tracking
+        self._stats = {
+            "jobs_processed": 0,
+            "jobs_fetched": 0,
+            "cache_hits": 0,
+            "total_processing_time": 0.0,
+            "start_time": time.perf_counter()
+        }
+
         if concurrency_modifier := config.get("concurrency_modifier"):
             self.concurrency_modifier = concurrency_modifier
 
         if not IS_LOCAL_TEST:
-            # below cannot be changed unless local
             return
 
         if jobs_fetcher := self.config.get("jobs_fetcher"):
@@ -72,49 +135,52 @@ def __init__(self, config: Dict[str, Any]):
             self.jobs_handler = jobs_handler
 
     async def set_scale(self):
+        """Optimized scaling with event-based waiting"""
         self.current_concurrency = self.concurrency_modifier(self.current_concurrency)
 
         if self.jobs_queue and (self.current_concurrency == self.jobs_queue.maxsize):
-            # no need to resize
             return
 
-        while self.current_occupancy() > 0:
-            # not safe to scale when jobs are in flight
-            await asyncio.sleep(1)
-            continue
+        # OPTIMIZATION: Use event instead of polling
+        scale_complete = asyncio.Event()
+        
+        async def wait_for_empty():
+            while self.current_occupancy() > 0:
+                await asyncio.sleep(0.1)  # Shorter sleep
+            scale_complete.set()
+        
+        wait_task = asyncio.create_task(wait_for_empty())
+        
+        try:
+            await asyncio.wait_for(scale_complete.wait(), timeout=30.0)
+        except asyncio.TimeoutError:
+            log.warning("Scaling timeout - proceeding anyway")
+            wait_task.cancel()
 
         self.jobs_queue = asyncio.Queue(maxsize=self.current_concurrency)
-        log.debug(
-            f"JobScaler.set_scale | New concurrency set to: {self.current_concurrency}"
-        )
+        self._queue_size = 0
+        log.debug(f"JobScaler.set_scale | New concurrency: {self.current_concurrency}")
 
     def start(self):
-        """
-        This is required for the worker to be able to shut down gracefully
-        when the user sends a SIGTERM or SIGINT signal. This is typically
-        the case when the worker is running in a container.
-        """
+        """Start with performance tracking"""
         sys.excepthook = _handle_uncaught_exception
 
         try:
-            # Register signal handlers for graceful shutdown
             signal.signal(signal.SIGTERM, self.handle_shutdown)
             signal.signal(signal.SIGINT, self.handle_shutdown)
         except ValueError:
             log.warning("Signal handling is only supported in the main thread.")
 
-        # Start the main loop
-        # Run forever until the worker is signalled to shut down.
+        
+
         asyncio.run(self.run())
 
     def handle_shutdown(self, signum, frame):
         """
         Called when the worker is signalled to shut down.
-
         This function is called when the worker receives a signal to shut down, such as
         SIGTERM or SIGINT. It sets the shutdown event, which will cause the worker to
         exit its main loop and shut down gracefully.
-
         Args:
             signum: The signal number that was received.
             frame: The current stack frame.
@@ -123,16 +189,21 @@ def handle_shutdown(self, signum, frame):
         self.kill_worker()
 
     async def run(self):
-        # Create an async session that will be closed when the worker is killed.
+        """Optimized main loop"""
         async with AsyncClientSession() as session:
-            # Create tasks for getting and running jobs.
-            jobtake_task = asyncio.create_task(self.get_jobs(session))
-            jobrun_task = asyncio.create_task(self.run_jobs(session))
+            #  Use create_task instead of gather for better control
+            tasks = [
+                asyncio.create_task(self.get_jobs(session), name="job_fetcher"),
+                asyncio.create_task(self.run_jobs(session), name="job_runner")
+            ]
 
-            tasks = [jobtake_task, jobrun_task]
-
-            # Concurrently run both tasks and wait for both to finish.
-            await asyncio.gather(*tasks)
+            try:
+                await asyncio.gather(*tasks)
+            except Exception as e:
+                log.error(f"Error in main loop: {e}")
+                for task in tasks:
+                    task.cancel()
+                raise
 
     def is_alive(self):
         """
@@ -148,112 +219,150 @@ def kill_worker(self):
         self._shutdown_event.set()
 
     def current_occupancy(self) -> int:
-        current_queue_count = self.jobs_queue.qsize()
-        current_progress_count = job_progress.get_job_count()
-
-        log.debug(
-            f"JobScaler.status | concurrency: {self.current_concurrency}; queue: {current_queue_count}; progress: {current_progress_count}"
-        )
-        return current_progress_count + current_queue_count
+        """Optimized occupancy check using cached values"""
+        # Use cached queue size instead of qsize()
+        queue_count = self._queue_size
+        progress_count = job_progress.get_job_count()
+        
+        total = queue_count + progress_count
+        log.debug(f"Occupancy: {total} (queue: {queue_count}, progress: {progress_count})")
+        return total
 
     async def get_jobs(self, session: ClientSession):
-        """
-        Retrieve multiple jobs from the server in batches using blocking requests.
-
-        Runs the block in an infinite loop while the worker is alive.
-
-        Adds jobs to the JobsQueue
-        """
+        """Optimized job fetching with caching and batching"""
+        consecutive_empty = 0
+        
         while self.is_alive():
             await self.set_scale()
 
             jobs_needed = self.current_concurrency - self.current_occupancy()
+            
             if jobs_needed <= 0:
-                log.debug("JobScaler.get_jobs | Queue is full. Retrying soon.")
-                await asyncio.sleep(1)  # don't go rapidly
+                await asyncio.sleep(0.1)  # Shorter sleep
                 continue
 
             try:
-                log.debug("JobScaler.get_jobs | Starting job acquisition.")
+                #  Check cache first
+                cached_jobs = await self._job_cache.get_jobs(jobs_needed)
+                if cached_jobs:
+                    self._stats["cache_hits"] += len(cached_jobs)
+                    for job in cached_jobs:
+                        await self._put_job(job)
+                    
+                    jobs_needed -= len(cached_jobs)
+                    if jobs_needed <= 0:
+                        continue
+
+                # Fetch more jobs than needed (batching)
+                fetch_count = min(jobs_needed * 3, 50)  # Fetch up to 3x needed, max 50
+                
+                log.debug(f"JobScaler.get_jobs | Fetching {fetch_count} jobs (need {jobs_needed})")
 
-                # Keep the connection to the blocking call with timeout
                 acquired_jobs = await asyncio.wait_for(
-                    self.jobs_fetcher(session, jobs_needed),
+                    self.jobs_fetcher(session, fetch_count),
                     timeout=self.jobs_fetcher_timeout,
                 )
 
                 if not acquired_jobs:
-                    log.debug("JobScaler.get_jobs | No jobs acquired.")
+                    consecutive_empty += 1
+                    # Exponential backoff
+                    wait_time = min(0.1 * (2 ** consecutive_empty), 5.0)
+                    await asyncio.sleep(wait_time)
                     continue
+                
+                consecutive_empty = 0
+                self._stats["jobs_fetched"] += len(acquired_jobs)
 
-                for job in acquired_jobs:
-                    await self.jobs_queue.put(job)
-                    job_progress.add(job)
-                    log.debug("Job Queued", job["id"])
+                # Queue what we need now
+                for i, job in enumerate(acquired_jobs):
+                    if i < jobs_needed:
+                        await self._put_job(job)
+                    else:
+                        # Cache excess jobs
+                        await self._job_cache.add_jobs(acquired_jobs[i:])
+                        break
 
-                log.info(f"Jobs in queue: {self.jobs_queue.qsize()}")
+                log.info(f"Jobs in queue: {self._queue_size}, cached: {self._job_cache.size()}")
 
             except TooManyRequests:
-                log.debug(
-                    f"JobScaler.get_jobs | Too many requests. Debounce for 5 seconds."
-                )
-                await asyncio.sleep(5)  # debounce for 5 seconds
+                log.debug("Too many requests. Backing off...")
+                await asyncio.sleep(5)
             except asyncio.CancelledError:
-                log.debug("JobScaler.get_jobs | Request was cancelled.")
-                raise  # CancelledError is a BaseException
+                raise
             except asyncio.TimeoutError:
-                log.debug("JobScaler.get_jobs | Job acquisition timed out. Retrying.")
-            except TypeError as error:
-                log.debug(f"JobScaler.get_jobs | Unexpected error: {error}.")
+                log.debug("Job acquisition timed out.")
             except Exception as error:
-                log.error(
-                    f"Failed to get job. | Error Type: {type(error).__name__} | Error Message: {str(error)}"
-                )
-            finally:
-                # Yield control back to the event loop
-                await asyncio.sleep(0)
-
-    async def run_jobs(self, session: ClientSession):
-        """
-        Retrieve jobs from the jobs queue and process them concurrently.
-
-        Runs the block in an infinite loop while the worker is alive or jobs queue is not empty.
-        """
-        tasks = []  # Store the tasks for concurrent job processing
-
-        while self.is_alive() or not self.jobs_queue.empty():
-            # Fetch as many jobs as the concurrency allows
-            while len(tasks) < self.current_concurrency and not self.jobs_queue.empty():
-                job = await self.jobs_queue.get()
+                log.error(f"Error getting job: {type(error).__name__}: {error}")
+            
+            # OPTIMIZATION: Minimal sleep
+            await asyncio.sleep(0)
 
-                # Create a new task for each job and add it to the task list
-                task = asyncio.create_task(self.handle_job(session, job))
-                tasks.append(task)
+    async def _put_job(self, job: Dict[str, Any]):
+        """Helper to put job in queue and track size"""
+        await self.jobs_queue.put(job)
+        async with self._queue_lock:
+            self._queue_size += 1
+        job_progress.add(job)
+        log.debug("Job Queued", job["id"])
 
-            # Wait for any job to finish
-            if tasks:
-                log.info(f"Jobs in progress: {len(tasks)}")
+    async def _get_job(self) -> Optional[Dict[str, Any]]:
+        """Helper to get job from queue and track size"""
+        try:
+            job = await asyncio.wait_for(self.jobs_queue.get(), timeout=0.1)
+            async with self._queue_lock:
+                self._queue_size -= 1
+            return job
+        except asyncio.TimeoutError:
+            return None
 
-                done, pending = await asyncio.wait(
-                    tasks, return_when=asyncio.FIRST_COMPLETED
+    async def run_jobs(self, session: ClientSession):
+        """Optimized job runner with semaphore for cleaner concurrency"""
+        # OPTIMIZATION: Use semaphore instead of manual task tracking
+        semaphore = asyncio.Semaphore(self.current_concurrency)
+        active_tasks = set()
+
+        async def run_with_semaphore(job):
+            async with semaphore:
+                await self.handle_job(session, job)
+
+        while self.is_alive() or self._queue_size > 0:
+            # Try to fill up to concurrency limit
+            while len(active_tasks) < self.current_concurrency:
+                job = await self._get_job()
+                if not job:
+                    break
+                
+                # OPTIMIZATION: Create task with name for debugging
+                task = asyncio.create_task(
+                    run_with_semaphore(job),
+                    name=f"job_{job['id']}"
                 )
+                active_tasks.add(task)
+
+            if active_tasks:
+                # Wait for any task to complete
+                done, active_tasks = await asyncio.wait(
+                    active_tasks, 
+                    return_when=asyncio.FIRST_COMPLETED,
+                    timeout=0.1  # Don't wait forever
+                )
+                
+                # Update stats
+                self._stats["jobs_processed"] += len(done)
+            else:
+                # No active tasks, short sleep
+                await asyncio.sleep(0.01)
 
-                # Remove completed tasks from the list
-                tasks = [t for t in tasks if t not in done]
-
-            # Yield control back to the event loop
-            await asyncio.sleep(0)
-
-        # Ensure all remaining tasks finish before stopping
-        await asyncio.gather(*tasks)
+        # Wait for remaining tasks
+        if active_tasks:
+            await asyncio.gather(*active_tasks, return_exceptions=True)
 
     async def handle_job(self, session: ClientSession, job: dict):
-        """
-        Process an individual job. This function is run concurrently for multiple jobs.
-        """
+        """Handle job with performance tracking"""
+        start_time = time.perf_counter()
+        
         try:
             log.debug("Handling Job", job["id"])
-
             await self.jobs_handler(session, self.config, job)
 
             if self.config.get("refresh_worker", False):
@@ -261,13 +370,13 @@ async def handle_job(self, session: ClientSession, job: dict):
 
         except Exception as err:
             log.error(f"Error handling job: {err}", job["id"])
-            raise err
-
+            raise
         finally:
-            # Inform Queue of a task completion
             self.jobs_queue.task_done()
-
-            # Job is no longer in progress
             job_progress.remove(job)
-
-            log.debug("Finished Job", job["id"])
+            
+            # Track performance
+            elapsed = time.perf_counter() - start_time
+            self._stats["total_processing_time"] += elapsed
+            
+            log.debug("Finished Job", job["id"])
\ No newline at end of file
diff --git a/runpod/serverless/modules/worker_state.py b/runpod/serverless/modules/worker_state.py
index be5dc9db..c239216f 100644
--- a/runpod/serverless/modules/worker_state.py
+++ b/runpod/serverless/modules/worker_state.py
@@ -1,13 +1,8 @@
-"""
-Handles getting stuff from environment variables and updating the global state like job id.
-"""
-
 import os
 import time
 import uuid
-from multiprocessing import Manager
-from multiprocessing.managers import SyncManager
-from typing import Any, Dict, Optional
+import threading
+from typing import Any, Dict, Optional, Set
 
 from .rp_logger import RunPodLogger
 
@@ -60,25 +55,25 @@ def __str__(self) -> str:
         return self.id
 
 
-# ---------------------------------------------------------------------------- #
-#                                    Tracker                                   #
-# ---------------------------------------------------------------------------- #
+
 class JobsProgress:
-    """Track the state of current jobs in progress using shared memory."""
+    """
+    Track jobs in progress with min operations using threading.Lock
+    instead of multiprocessing.Manager for better performance.
+    """
     
     _instance: Optional['JobsProgress'] = None
-    _manager: SyncManager
-    _shared_data: Any
-    _lock: Any
+    _jobs: Dict[str, Dict[str, Any]]
+    _lock: threading.Lock
+    _count: int
 
     def __new__(cls):
         if cls._instance is None:
             instance = object.__new__(cls)
-            # Initialize instance variables
-            instance._manager = Manager()
-            instance._shared_data = instance._manager.dict()
-            instance._shared_data['jobs'] = instance._manager.list()
-            instance._lock = instance._manager.Lock()
+            # Initialize with threading.Lock (much faster than multiprocessing)
+            instance._jobs = {}
+            instance._lock = threading.Lock()
+            instance._count = 0
             cls._instance = instance
         return cls._instance
 
@@ -91,37 +86,34 @@ def __repr__(self) -> str:
 
     def clear(self) -> None:
         with self._lock:
-            self._shared_data['jobs'][:] = []
+            self._jobs.clear()
+            self._count = 0
 
     def add(self, element: Any):
         """
-        Adds a Job object to the set.
+        addition of jobs using dict
         """
         if isinstance(element, str):
+            job_id = element
             job_dict = {'id': element}
         elif isinstance(element, dict):
+            job_id = element.get('id')
             job_dict = element
         elif hasattr(element, 'id'):
+            job_id = element.id
             job_dict = {'id': element.id}
         else:
             raise TypeError("Only Job objects can be added to JobsProgress.")
 
         with self._lock:
-            # Check if job already exists
-            job_list = self._shared_data['jobs']
-            for existing_job in job_list:
-                if existing_job['id'] == job_dict['id']:
-                    return  # Job already exists
-            
-            # Add new job
-            job_list.append(job_dict)
-            log.debug(f"JobsProgress | Added job: {job_dict['id']}")
+            if job_id not in self._jobs:
+                self._jobs[job_id] = job_dict
+                self._count += 1
+                log.debug(f"JobsProgress | Added job: {job_id}")
 
     def get(self, element: Any) -> Optional[Job]:
         """
-        Retrieves a Job object from the set.
-        
-        If the element is a string, searches for Job with that id.
+        retrieval using dict lookup
         """
         if isinstance(element, str):
             search_id = element
@@ -131,16 +123,16 @@ def get(self, element: Any) -> Optional[Job]:
             raise TypeError("Only Job objects can be retrieved from JobsProgress.")
 
         with self._lock:
-            for job_dict in self._shared_data['jobs']:
-                if job_dict['id'] == search_id:
-                    log.debug(f"JobsProgress | Retrieved job: {job_dict['id']}")
-                    return Job(**job_dict)
+            job_dict = self._jobs.get(search_id)
+            if job_dict:
+                log.debug(f"JobsProgress | Retrieved job: {search_id}")
+                return Job(**job_dict)
         
         return None
 
     def remove(self, element: Any):
         """
-        Removes a Job object from the set.
+        removal using dict
         """
         if isinstance(element, str):
             job_id = element
@@ -152,49 +144,48 @@ def remove(self, element: Any):
             raise TypeError("Only Job objects can be removed from JobsProgress.")
 
         with self._lock:
-            job_list = self._shared_data['jobs']
-            # Find and remove the job
-            for i, job_dict in enumerate(job_list):
-                if job_dict['id'] == job_id:
-                    del job_list[i]
-                    log.debug(f"JobsProgress | Removed job: {job_dict['id']}")
-                    break
+            if job_id in self._jobs:
+                del self._jobs[job_id]
+                self._count -= 1
+                log.debug(f"JobsProgress | Removed job: {job_id}")
 
     def get_job_list(self) -> Optional[str]:
         """
         Returns the list of job IDs as comma-separated string.
         """
         with self._lock:
-            job_list = list(self._shared_data['jobs'])
+            if not self._jobs:
+                return None
+            
+            job_ids = list(self._jobs.keys())
         
-        if not job_list:
-            return None
-
-        log.debug(f"JobsProgress | Jobs in progress: {job_list}")
-        return ",".join(str(job_dict['id']) for job_dict in job_list)
+        log.debug(f"JobsProgress | Jobs in progress: {job_ids}")
+        return ",".join(job_ids)
 
     def get_job_count(self) -> int:
         """
-        Returns the number of jobs.
+        count operation
         """
-        with self._lock:
-            return len(self._shared_data['jobs'])
+        # No lock needed for reading an int (atomic operation)
+        return self._count
 
     def __iter__(self):
         """Make the class iterable - returns Job objects"""
         with self._lock:
-            # Create a snapshot of jobs to avoid holding lock during iteration
-            job_dicts = list(self._shared_data['jobs'])
+            # Create a snapshot to avoid holding lock during iteration
+            job_dicts = list(self._jobs.values())
         
         # Return an iterator of Job objects
         return iter(Job(**job_dict) for job_dict in job_dicts)
 
     def __len__(self):
         """Support len() operation"""
-        return self.get_job_count()
+        return self._count
 
     def __contains__(self, element: Any) -> bool:
-        """Support 'in' operator"""
+        """
+        membership test using dict
+        """
         if isinstance(element, str):
             search_id = element
         elif isinstance(element, Job):
@@ -205,7 +196,6 @@ def __contains__(self, element: Any) -> bool:
             return False
 
         with self._lock:
-            for job_dict in self._shared_data['jobs']:
-                if job_dict['id'] == search_id:
-                    return True
-        return False
+            return search_id in self._jobs
+
+