|
22 | 22 | # See the License for the specific language governing permissions and |
23 | 23 | # limitations under the License. |
24 | 24 | """Inference-only Qwen2-VL model compatible with HuggingFace weights.""" |
25 | | -from array import array |
26 | 25 | from functools import lru_cache, partial |
27 | 26 | from typing import (Iterable, List, Mapping, Optional, Tuple, Type, TypedDict, |
28 | 27 | Union) |
|
66 | 65 | from vllm.multimodal.base import MultiModalData |
67 | 66 | from vllm.multimodal.image import cached_get_image_processor |
68 | 67 | from vllm.platforms import current_platform |
69 | | -from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE, IntermediateTensors, |
70 | | - SequenceData) |
| 68 | +from vllm.sequence import IntermediateTensors, SequenceData |
71 | 69 | from vllm.transformers_utils.processor import get_processor |
72 | 70 |
|
73 | 71 | logger = init_logger(__name__) |
@@ -681,15 +679,14 @@ def dummy_data_for_qwen2_vl( |
681 | 679 | "--limit-mm-per-prompt.") |
682 | 680 |
|
683 | 681 | hf_config = ctx.get_hf_config(Qwen2VLConfig) |
684 | | - token_ids = array(VLLM_TOKEN_ID_ARRAY_TYPE, |
685 | | - [hf_config.vision_start_token_id]) |
686 | | - token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE, |
687 | | - [hf_config.image_token_id]) * max_llm_image_tokens |
688 | | - token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE, |
689 | | - [hf_config.vision_end_token_id]) |
690 | | - token_ids += array(VLLM_TOKEN_ID_ARRAY_TYPE, |
691 | | - [0]) * (seq_len - max_llm_image_tokens - 2) |
692 | | - dummy_seqdata = SequenceData(token_ids) |
| 682 | + |
| 683 | + dummy_seqdata = SequenceData.from_token_counts( |
| 684 | + (hf_config.vision_start_token_id, 1), |
| 685 | + (hf_config.image_token_id, max_llm_image_tokens), |
| 686 | + (hf_config.vision_end_token_id, 1), |
| 687 | + (0, seq_len - max_llm_image_tokens - 2), |
| 688 | + ) |
| 689 | + |
693 | 690 | dummy_image = Image.new("RGB", (max_resized_width, max_resized_height), |
694 | 691 | color=0) |
695 | 692 |
|
|
0 commit comments