1414# limitations under the License.
1515
1616
17+ import json
1718import os
1819import sqlite3
1920import threading
@@ -67,7 +68,7 @@ def get_token_usage_from_cache_db(cache_db_path: str | Path) -> dict:
6768 "total_cached_requests" : row [3 ],
6869 }
6970 except Exception as e :
70- logger .warning (f "Failed to read token usage from cache: { e } " )
71+ logger .warning ("Failed to read token usage from cache" , error = str ( e ) )
7172
7273 return {}
7374
@@ -81,8 +82,126 @@ def get_token_usage_from_cache(cache_dir: str) -> dict:
8182 return get_token_usage_from_cache_db (cache_db_path )
8283
8384
85+ def aggregate_runtime_metrics (output_dir : str ) -> dict [str , Any ]:
86+ """Aggregate all run data from run_times directory."""
87+ run_times_dir = Path (output_dir ) / "run_times"
88+ aggregated_metrics = {}
89+
90+ if not run_times_dir .exists ():
91+ return aggregated_metrics
92+
93+ total_runtime = 0
94+ earliest_start = None
95+ latest_end = None
96+ max_peak_memory = 0
97+ max_peak_tree_memory = 0
98+ run_count = 0
99+
100+ for run_file in run_times_dir .glob ("runtime_*.json" ):
101+ try :
102+ with open (run_file , "r" ) as f :
103+ run_data = json .load (f )
104+ total_runtime += run_data .get ("runtime_seconds" , 0 )
105+ run_count += 1
106+
107+ # Track earliest start and latest end
108+ run_start = run_data .get ("start_time" , "" )
109+ run_end = run_data .get ("end_time" , "" )
110+ if earliest_start is None or run_start < earliest_start :
111+ earliest_start = run_start
112+ if latest_end is None or run_end > latest_end :
113+ latest_end = run_end
114+
115+ # Track peak memory across all runs
116+ max_peak_memory = max (
117+ max_peak_memory , run_data .get ("peak_memory_bytes" , 0 )
118+ )
119+ max_peak_tree_memory = max (
120+ max_peak_tree_memory , run_data .get ("peak_tree_memory_bytes" , 0 )
121+ )
122+ except Exception :
123+ pass
124+
125+ if run_count > 0 :
126+ aggregated_metrics = {
127+ "runtime_seconds" : total_runtime ,
128+ "start_time" : earliest_start ,
129+ "end_time" : latest_end ,
130+ "peak_memory_bytes" : max_peak_memory ,
131+ "peak_tree_memory_bytes" : max_peak_tree_memory ,
132+ "total_runs" : run_count ,
133+ }
134+
135+ # Try to get inference time from response stats and calculate scoring time
136+ try :
137+ metrics_file = Path (output_dir ) / "eval_factory_metrics.json"
138+ if metrics_file .exists ():
139+ with open (metrics_file , "r" ) as f :
140+ metrics_data = json .load (f )
141+ response_stats = metrics_data .get ("response_stats" , {})
142+ inference_time = response_stats .get ("inference_time" , 0.0 )
143+
144+ # Calculate scoring time as runtime - inference time
145+ scoring_time = max (0.0 , total_runtime - inference_time )
146+ aggregated_metrics ["inference_time_seconds" ] = inference_time
147+ aggregated_metrics ["scoring_time_seconds" ] = scoring_time
148+ except Exception as e :
149+ # If we can't read response stats, just continue without scoring time
150+ logger .warning (
151+ "Could not extract inference time from response stats" , error = str (e )
152+ )
153+
154+ return aggregated_metrics
155+
156+
157+ def _update_persistent_metrics (
158+ output_dir : str ,
159+ start_time : float ,
160+ peak_memory : int ,
161+ peak_tree_memory : int ,
162+ run_id : str ,
163+ ) -> None :
164+ """Save individual run data and update peak memory only."""
165+ try :
166+ # Create run_times directory
167+ run_times_dir = Path (output_dir ) / "run_times"
168+ run_times_dir .mkdir (exist_ok = True )
169+
170+ # Save individual run runtime
171+ current_time = time .time ()
172+ current_runtime = current_time - start_time
173+ run_file = run_times_dir / f"runtime_{ run_id } .json"
174+
175+ with open (run_file , "w" ) as f :
176+ json .dump (
177+ {
178+ "run_id" : run_id ,
179+ "start_time" : time .strftime (
180+ "%Y-%m-%dT%H:%M:%S.%fZ" , time .gmtime (start_time )
181+ ),
182+ "end_time" : time .strftime (
183+ "%Y-%m-%dT%H:%M:%S.%fZ" , time .gmtime (current_time )
184+ ),
185+ "runtime_seconds" : current_runtime ,
186+ "peak_memory_bytes" : peak_memory ,
187+ "peak_tree_memory_bytes" : peak_tree_memory ,
188+ },
189+ f ,
190+ )
191+
192+ except Exception as e :
193+ logger .warning (
194+ "Failed to update persistent metrics" , error = str (e ), run_id = run_id
195+ )
196+
197+
84198def monitor_memory_usage (
85- func , * args , interval_ms , cache_dir : str | None = None , ** kwargs
199+ func ,
200+ * args ,
201+ interval_ms ,
202+ cache_dir : str | None = None ,
203+ output_dir : str | None = None ,
204+ ** kwargs ,
86205) -> tuple [EvaluationResult , dict [str , Any ]]:
87206 """
88207 Run func(*args, **kwargs) while polling RSS via psutil.
@@ -91,8 +210,21 @@ def monitor_memory_usage(
91210 - peak_tree_rss_bytes: peak memory usage of the entire process tree (main + children)
92211 """
93212 proc = psutil .Process (os .getpid ())
213+
214+ # Generate meaningful run ID (counter or date)
215+ if output_dir :
216+ run_times_dir = Path (output_dir ) / "run_times"
217+ run_times_dir .mkdir (exist_ok = True )
218+ # Count existing runs to get next ID
219+ existing_runs = list (run_times_dir .glob ("runtime_*.json" ))
220+ run_id = str (len (existing_runs ))
221+ else :
222+ run_id = "0"
223+
224+ # Initialize values
94225 peak = 0
95226 peak_tree = 0
227+
96228 stop = False
97229 ret = None
98230
@@ -111,6 +243,9 @@ def get_tree_memory(process):
111243
112244 def sampler ():
113245 nonlocal peak , peak_tree
246+ last_save_time = 0
247+ save_interval = 5.0 # Save every 5 seconds
248+
114249 while not stop :
115250 # Get memory for current process
116251 rss = proc .memory_info ().rss
@@ -120,6 +255,15 @@ def sampler():
120255 tree_rss = get_tree_memory (proc )
121256 peak_tree = max (peak_tree , tree_rss )
122257
258+ # Update persistent metrics file if output_dir is provided and enough time has passed
259+ if output_dir :
260+ current_time = time .time ()
261+ if current_time - last_save_time >= save_interval :
262+ _update_persistent_metrics (
263+ output_dir , start_time , peak , peak_tree , run_id
264+ )
265+ last_save_time = current_time
266+
123267 time .sleep (interval_ms / 1000.0 )
124268
125269 th = threading .Thread (target = sampler , daemon = True )
@@ -144,15 +288,15 @@ def sampler():
144288 try :
145289 token_usage = get_token_usage_from_cache (cache_dir )
146290 except Exception as e :
147- logger .warning (f "Failed to get token usage from cache: { e } " )
291+ logger .warning ("Failed to get token usage from cache" , error = str ( e ) )
148292
149293 metrics = {
150294 "runtime_seconds" : runtime_seconds ,
151295 "start_time" : time .strftime ("%Y-%m-%dT%H:%M:%S.%fZ" , time .gmtime (start_time )),
152296 "end_time" : time .strftime ("%Y-%m-%dT%H:%M:%S.%fZ" , time .gmtime (end_time )),
153297 "token_usage" : token_usage ,
154- "peak_memory_bytes" : peak , # Memory of main process
155- "peak_tree_memory_bytes" : peak_tree , # Memory of entire process tree
298+ "peak_memory_bytes" : peak ,
299+ "peak_tree_memory_bytes" : peak_tree ,
156300 }
157301
158302 return ret , metrics
0 commit comments