Skip to content

Commit c7e2d65

Browse files
authored
bugfix: temporarily use heap allocation for VLM backend. (#334)
1 parent 516420f commit c7e2d65

File tree

2 files changed

+17
-6
lines changed

2 files changed

+17
-6
lines changed

xllm/api_service/api_service.cpp

100644100755
Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -146,8 +146,8 @@ void ChatCompletionsImpl(std::unique_ptr<Service>& service,
146146
return;
147147
}
148148

149-
auto call =
150-
std::make_shared<ChatCall>(ctrl, guard.release(), req_pb, resp_pb);
149+
auto call = std::make_shared<ChatCall>(
150+
ctrl, guard.release(), req_pb, resp_pb, arena != nullptr /*use_arena*/);
151151
service->process_async(call);
152152
}
153153
} // namespace
@@ -166,17 +166,18 @@ void APIService::ChatCompletionsHttp(
166166
return;
167167
}
168168

169-
auto arena = response->GetArena();
170169
auto ctrl = reinterpret_cast<brpc::Controller*>(controller);
171170

172171
if (FLAGS_backend == "llm") {
172+
auto arena = response->GetArena();
173173
CHECK(chat_service_impl_) << " chat service is invalid.";
174174
ChatCompletionsImpl<ChatCall, ChatServiceImpl>(
175175
chat_service_impl_, done_guard, arena, ctrl);
176176
} else if (FLAGS_backend == "vlm") {
177177
CHECK(mm_chat_service_impl_) << " mm chat service is invalid.";
178+
// TODO: fix me - temporarily using heap allocation instead of arena
178179
ChatCompletionsImpl<MMChatCall, MMChatServiceImpl>(
179-
mm_chat_service_impl_, done_guard, arena, ctrl);
180+
mm_chat_service_impl_, done_guard, nullptr, ctrl);
180181
}
181182
}
182183

xllm/api_service/stream_call.h

100644100755
Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,13 @@ class StreamCall : public Call {
3939
StreamCall(brpc::Controller* controller,
4040
::google::protobuf::Closure* done,
4141
Request* request,
42-
Response* response)
43-
: Call(controller), done_(done), request_(request), response_(response) {
42+
Response* response,
43+
bool use_arena = true)
44+
: Call(controller),
45+
done_(done),
46+
request_(request),
47+
response_(response),
48+
use_arena_(use_arena) {
4449
stream_ = request_->stream();
4550
if (stream_) {
4651
pa_ = controller_->CreateProgressiveAttachment();
@@ -67,6 +72,10 @@ class StreamCall : public Call {
6772
if (!stream_) {
6873
done_->Run();
6974
}
75+
if (!use_arena_) {
76+
delete request_;
77+
delete response_;
78+
}
7079
}
7180

7281
bool write_and_finish(Response& response) {
@@ -142,6 +151,7 @@ class StreamCall : public Call {
142151
Response* response_;
143152

144153
bool stream_ = false;
154+
bool use_arena_ = true;
145155
butil::intrusive_ptr<brpc::ProgressiveAttachment> pa_;
146156
butil::IOBuf io_buf_;
147157

0 commit comments

Comments
 (0)