Skip to content

Commit 4909aa6

Browse files
committed
feat: improve RAM estimation by using values from summary
Signed-off-by: Ettore Di Giacinto <[email protected]>
1 parent 0870bf5 commit 4909aa6

File tree

1 file changed

+21
-13
lines changed

1 file changed

+21
-13
lines changed

pkg/xsysinfo/gguf.go

Lines changed: 21 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,6 @@
11
package xsysinfo
22

33
import (
4-
"errors"
5-
64
gguf "github.com/gpustack/gguf-parser-go"
75
)
86

@@ -18,35 +16,45 @@ type VRAMEstimate struct {
1816
func EstimateGGUFVRAMUsage(f *gguf.GGUFFile, availableVRAM uint64) (*VRAMEstimate, error) {
1917
// Get model metadata
2018
m := f.Metadata()
21-
a := f.Architecture()
19+
20+
estimate := f.EstimateLLaMACppRun()
21+
22+
lmes := estimate.SummarizeItem(true, 0, 0)
23+
estimatedVRAM := uint64(0)
24+
availableLayers := lmes.OffloadLayers // TODO: check if we can just use OffloadLayers here
25+
26+
for _, vram := range lmes.VRAMs {
27+
estimatedVRAM += uint64(vram.NonUMA)
28+
}
2229

2330
// Calculate base model size
2431
modelSize := uint64(m.Size)
2532

26-
if a.BlockCount == 0 {
27-
return nil, errors.New("block count is 0")
33+
if availableLayers == 0 {
34+
availableLayers = 1
35+
}
36+
37+
if estimatedVRAM == 0 {
38+
estimatedVRAM = 1
2839
}
2940

3041
// Estimate number of layers that can fit in VRAM
3142
// Each layer typically requires about 1/32 of the model size
32-
layerSize := modelSize / uint64(a.BlockCount)
33-
estimatedLayers := int(availableVRAM / layerSize)
43+
layerSize := estimatedVRAM / availableLayers
3444

35-
// If we can't fit even one layer, we need to do full offload
36-
isFullOffload := estimatedLayers <= 0
37-
if isFullOffload {
38-
estimatedLayers = 0
45+
estimatedLayers := int(availableVRAM / layerSize)
46+
if availableVRAM > estimatedVRAM {
47+
estimatedLayers = int(availableLayers)
3948
}
4049

4150
// Calculate estimated VRAM usage
42-
estimatedVRAM := uint64(estimatedLayers) * layerSize
4351

4452
return &VRAMEstimate{
4553
TotalVRAM: availableVRAM,
4654
AvailableVRAM: availableVRAM,
4755
ModelSize: modelSize,
4856
EstimatedLayers: estimatedLayers,
4957
EstimatedVRAM: estimatedVRAM,
50-
IsFullOffload: isFullOffload,
58+
IsFullOffload: availableVRAM > estimatedVRAM,
5159
}, nil
5260
}

0 commit comments

Comments
 (0)