@@ -87,7 +87,7 @@ def generate_text(
8787 messages : ModelInput ,
8888 parameters : dict [str , Any ],
8989 ) -> tuple [LazyMessage , PdlLazy [Any ]]:
90- print ("Asynchronous model call started" , file = stderr )
90+ print (f "Asynchronous model call started to { block . model } " , file = stderr )
9191 # global _BACKGROUND_TASKS
9292 future = asyncio .run_coroutine_threadsafe (
9393 LitellmModel .async_generate_text (
@@ -110,24 +110,29 @@ def update_end_nanos(future):
110110 if block .pdl__timing is not None :
111111 block .pdl__timing .end_nanos = time .time_ns ()
112112
113- # report call completion and its duration (and if available queueing time)
114- res = future .result ()[1 ]
113+ # report call completion and its duration
115114 start = (
116115 block .pdl__timing .start_nanos
117116 if block .pdl__timing .start_nanos is not None
118117 else 0
119118 )
120119 exec_nanos = block .pdl__timing .end_nanos - start
121- queue_nanos = 0
122- if "created" in res :
123- queue_nanos = (
124- res ["created" ] * 1000000000 - block .pdl__timing .start_nanos
125- )
126- exec_nanos = exec_nanos - queue_nanos
127120 print (
128- f"Asynchronous model call to { block .model } completed in { (exec_nanos )/ 1000000 } ms queued for { queue_nanos / 1000000 } ms " ,
121+ f"Asynchronous model call to { block .model } completed in { (exec_nanos )/ 1000000 } ms" ,
129122 file = stderr ,
130123 )
124+ msg = future .result ()[0 ]
125+ if msg ["content" ] is not None :
126+ from termcolor import colored
127+
128+ from .pdl_ast import BlockKind
129+ from .pdl_scheduler import color_of
130+
131+ print (
132+ colored (msg ["content" ], color = color_of (BlockKind .MODEL )),
133+ file = stderr ,
134+ )
135+ print ("\n " , file = stderr )
131136
132137 future .add_done_callback (update_end_nanos )
133138
0 commit comments