@@ -146,8 +146,8 @@ void ChatCompletionsImpl(std::unique_ptr<Service>& service,
146146 return ;
147147 }
148148
149- auto call =
150- std::make_shared<ChatCall>( ctrl, guard.release (), req_pb, resp_pb);
149+ auto call = std::make_shared<ChatCall>(
150+ ctrl, guard.release (), req_pb, resp_pb, arena != nullptr /* use_arena */ );
151151 service->process_async (call);
152152}
153153} // namespace
@@ -166,17 +166,18 @@ void APIService::ChatCompletionsHttp(
166166 return ;
167167 }
168168
169- auto arena = response->GetArena ();
170169 auto ctrl = reinterpret_cast <brpc::Controller*>(controller);
171170
172171 if (FLAGS_backend == " llm" ) {
172+ auto arena = response->GetArena ();
173173 CHECK (chat_service_impl_) << " chat service is invalid." ;
174174 ChatCompletionsImpl<ChatCall, ChatServiceImpl>(
175175 chat_service_impl_, done_guard, arena, ctrl);
176176 } else if (FLAGS_backend == " vlm" ) {
177177 CHECK (mm_chat_service_impl_) << " mm chat service is invalid." ;
178+ // TODO: fix me - temporarily using heap allocation instead of arena
178179 ChatCompletionsImpl<MMChatCall, MMChatServiceImpl>(
179- mm_chat_service_impl_, done_guard, arena , ctrl);
180+ mm_chat_service_impl_, done_guard, nullptr , ctrl);
180181 }
181182}
182183
0 commit comments