File tree Expand file tree Collapse file tree 7 files changed +7
-7
lines changed Expand file tree Collapse file tree 7 files changed +7
-7
lines changed Original file line number Diff line number Diff line change 4747 optimize_model = False ,
4848 trust_remote_code = True ,
4949 use_cache = True )
50- model = model .to ('xpu' )
50+ model = model .half (). to ('xpu' )
5151
5252 # Load tokenizer
5353 tokenizer = CodeLlamaTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 4747 optimize_model = False ,
4848 trust_remote_code = True ,
4949 use_cache = True )
50- model = model .to ('xpu' )
50+ model = model .half (). to ('xpu' )
5151
5252 # Load tokenizer
5353 tokenizer = AutoTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 4747 load_in_4bit = True ,
4848 trust_remote_code = True ,
4949 use_cache = True )
50- model = model .to ('xpu' )
50+ model = model .half (). to ('xpu' )
5151
5252 # Load tokenizer
5353 tokenizer = AutoTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 5050 # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
5151 model = optimize_model (model )
5252
53- model = model .to ('xpu' )
53+ model = model .half (). to ('xpu' )
5454
5555 # Load tokenizer
5656 tokenizer = CodeLlamaTokenizer .from_pretrained (model_path , trust_remote_code = True )
Original file line number Diff line number Diff line change 4646 use_cache = True )
4747 model = optimize_model (model )
4848
49- model = model .to ('xpu' )
49+ model = model .half (). to ('xpu' )
5050
5151 # Load tokenizer
5252 tokenizer = AutoTokenizer .from_pretrained (model_path ,
Original file line number Diff line number Diff line change 4949 # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
5050 model = optimize_model (model )
5151
52- model = model .to ('xpu' )
52+ model = model .half (). to ('xpu' )
5353
5454 # Load tokenizer
5555 tokenizer = AutoTokenizer .from_pretrained (model_path , trust_remote_code = True )
Original file line number Diff line number Diff line change 4949 # When running LLMs on Intel iGPUs for Windows users, we recommend setting `cpu_embedding=True` in the optimize_model function.
5050 # This will allow the memory-intensive embedding layer to utilize the CPU instead of iGPU.
5151 model = optimize_model (model )
52- model = model .to ('xpu' )
52+ model = model .half (). to ('xpu' )
5353
5454 # Load tokenizer
5555 tokenizer = AutoTokenizer .from_pretrained (model_path , trust_remote_code = True )
You can’t perform that action at this time.
0 commit comments