5656 PoolingChatRequest ,
5757 PoolingCompletionRequest ,
5858 PoolingRequest , PoolingResponse ,
59+ RerankRequest , RerankResponse ,
5960 ScoreRequest , ScoreResponse ,
6061 TokenizeRequest ,
6162 TokenizeResponse ,
6869from vllm .entrypoints .openai .serving_models import (BaseModelPath ,
6970 OpenAIServingModels )
7071from vllm .entrypoints .openai .serving_pooling import OpenAIServingPooling
72+ from vllm .entrypoints .openai .serving_rerank import JinaAIServingRerank
7173from vllm .entrypoints .openai .serving_score import OpenAIServingScores
7274from vllm .entrypoints .openai .serving_tokenization import (
7375 OpenAIServingTokenization )
@@ -306,6 +308,10 @@ def score(request: Request) -> Optional[OpenAIServingScores]:
306308 return request .app .state .openai_serving_scores
307309
308310
311+ def rerank (request : Request ) -> Optional [JinaAIServingRerank ]:
312+ return request .app .state .jinaai_serving_reranking
313+
314+
309315def tokenization (request : Request ) -> OpenAIServingTokenization :
310316 return request .app .state .openai_serving_tokenization
311317
@@ -502,6 +508,40 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request):
502508 return await create_score (request , raw_request )
503509
504510
511+ @router .post ("/rerank" )
512+ @with_cancellation
513+ async def do_rerank (request : RerankRequest , raw_request : Request ):
514+ handler = rerank (raw_request )
515+ if handler is None :
516+ return base (raw_request ).create_error_response (
517+ message = "The model does not support Rerank (Score) API" )
518+ generator = await handler .do_rerank (request , raw_request )
519+ if isinstance (generator , ErrorResponse ):
520+ return JSONResponse (content = generator .model_dump (),
521+ status_code = generator .code )
522+ elif isinstance (generator , RerankResponse ):
523+ return JSONResponse (content = generator .model_dump ())
524+
525+ assert_never (generator )
526+
527+
528+ @router .post ("/v1/rerank" )
529+ @with_cancellation
530+ async def do_rerank_v1 (request : RerankRequest , raw_request : Request ):
531+ logger .warning (
532+ "To indicate that the rerank API is not part of the standard OpenAI"
533+ " API, we have located it at `/rerank`. Please update your client"
534+ "accordingly. (Note: Conforms to JinaAI rerank API)" )
535+
536+ return await do_rerank (request , raw_request )
537+
538+
539+ @router .post ("/v2/rerank" )
540+ @with_cancellation
541+ async def do_rerank_v2 (request : RerankRequest , raw_request : Request ):
542+ return await do_rerank (request , raw_request )
543+
544+
505545TASK_HANDLERS : Dict [str , Dict [str , tuple ]] = {
506546 "generate" : {
507547 "messages" : (ChatCompletionRequest , create_chat_completion ),
@@ -512,7 +552,10 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request):
512552 "default" : (EmbeddingCompletionRequest , create_embedding ),
513553 },
514554 "score" : {
515- "default" : (ScoreRequest , create_score ),
555+ "default" : (RerankRequest , do_rerank )
556+ },
557+ "rerank" : {
558+ "default" : (RerankRequest , do_rerank )
516559 },
517560 "reward" : {
518561 "messages" : (PoolingChatRequest , create_pooling ),
@@ -759,6 +802,12 @@ async def init_app_state(
759802 state .openai_serving_models ,
760803 request_logger = request_logger
761804 ) if model_config .task == "score" else None
805+ state .jinaai_serving_reranking = JinaAIServingRerank (
806+ engine_client ,
807+ model_config ,
808+ state .openai_serving_models ,
809+ request_logger = request_logger
810+ ) if model_config .task == "score" else None
762811 state .openai_serving_tokenization = OpenAIServingTokenization (
763812 engine_client ,
764813 model_config ,
0 commit comments