mudler
diff --git a/‎.github/workflows/image-pr.yml‎
Lines changed: 86 additions & 0 deletions b/‎.github/workflows/image-pr.yml‎
Lines changed: 86 additions & 0 deletions
diff --git a/‎.github/workflows/image.yml‎
Lines changed: 0 additions & 1 deletion b/‎.github/workflows/image.yml‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎.github/workflows/release.yaml‎
Lines changed: 15 additions & 3 deletions b/‎.github/workflows/release.yaml‎
Lines changed: 15 additions & 3 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 15 additions & 4 deletions b/‎.github/workflows/test.yml‎
Lines changed: 15 additions & 4 deletions
diff --git a/‎api/api.go‎
Lines changed: 2 additions & 21 deletions b/‎api/api.go‎
Lines changed: 2 additions & 21 deletions
diff --git a/‎api/api_test.go‎
Lines changed: 2 additions & 2 deletions b/‎api/api_test.go‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎api/config/config.go‎
Lines changed: 5 additions & 4 deletions b/‎api/config/config.go‎
Lines changed: 5 additions & 4 deletions
diff --git a/‎docs/content/advanced/_index.en.md‎
Lines changed: 7 additions & 1 deletion b/‎docs/content/advanced/_index.en.md‎
Lines changed: 7 additions & 1 deletion
diff --git a/‎docs/content/build/_index.en.md‎
Lines changed: 8 additions & 0 deletions b/‎docs/content/build/_index.en.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/content/features/GPU-acceleration.md‎
Lines changed: 10 additions & 2 deletions b/‎docs/content/features/GPU-acceleration.md‎
Lines changed: 10 additions & 2 deletions
@@ -0,0 +1,86 @@
+---
+name: 'build container images tests'
+
+on:
+  pull_request:
+
+concurrency:
+  group: ci-${{ github.head_ref || github.ref }}-${{ github.repository }}
+  cancel-in-progress: true
+
+jobs:
+  extras-image-build:
+    uses: ./.github/workflows/image_build.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
+      image-type: ${{ matrix.image-type }}
+      build-type: ${{ matrix.build-type }}
+      cuda-major-version: ${{ matrix.cuda-major-version }}
+      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+      platforms: ${{ matrix.platforms }}
+      runs-on: ${{ matrix.runs-on }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      # Pushing with all jobs in parallel
+      # eats the bandwidth of all the nodes
+      max-parallel: ${{ github.event_name != 'pull_request' && 2 || 4 }}
+      matrix:
+        include:
+          - build-type: ''
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "1"
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12-ffmpeg'
+            ffmpeg: 'true'
+            image-type: 'extras'
+            runs-on: 'arc-runner-set'
+  core-image-build:
+    uses: ./.github/workflows/image_build.yml
+    with:
+      tag-latest: ${{ matrix.tag-latest }}
+      tag-suffix: ${{ matrix.tag-suffix }}
+      ffmpeg: ${{ matrix.ffmpeg }}
+      image-type: ${{ matrix.image-type }}
+      build-type: ${{ matrix.build-type }}
+      cuda-major-version: ${{ matrix.cuda-major-version }}
+      cuda-minor-version: ${{ matrix.cuda-minor-version }}
+      platforms: ${{ matrix.platforms }}
+      runs-on: ${{ matrix.runs-on }}
+    secrets:
+      dockerUsername: ${{ secrets.DOCKERHUB_USERNAME }}
+      dockerPassword: ${{ secrets.DOCKERHUB_PASSWORD }}
+      quayUsername: ${{ secrets.LOCALAI_REGISTRY_USERNAME }}
+      quayPassword: ${{ secrets.LOCALAI_REGISTRY_PASSWORD }}
+    strategy:
+      matrix:
+        include:
+          - build-type: ''
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'ubuntu-latest'
+          - build-type: 'cublas'
+            cuda-major-version: "12"
+            cuda-minor-version: "1"
+            platforms: 'linux/amd64'
+            tag-latest: 'false'
+            tag-suffix: '-cublas-cuda12-ffmpeg-core'
+            ffmpeg: 'true'
+            image-type: 'core'
+            runs-on: 'ubuntu-latest'
@@ -2,7 +2,6 @@
 name: 'build container images'
 
 on:
-  pull_request:
   push:
     branches:
       - master
 
@@ -34,10 +34,22 @@ jobs:
           sudo apt-get update
           sudo apt-get install build-essential ffmpeg
 
+      - name: Cache grpc
+        id: cache-grpc
+        uses: actions/cache@v3
+        with:
+          path: grpc
+          key: ${{ runner.os }}-grpc
+      - name: Build grpc
+        if: steps.cache-grpc.outputs.cache-hit != 'true'
+        run: |
           git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-                -DgRPC_BUILD_TESTS=OFF \
-                ../.. && sudo make -j12 install
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+            -DgRPC_BUILD_TESTS=OFF \
+            ../.. && sudo make -j12
+      - name: Install gRPC
+        run: |
+          cd grpc && cd cmake/build && sudo make -j12 install
 
       - name: Build
         id: build
 
@@ -86,11 +86,22 @@ jobs:
           sudo cp -rfv sources/go-piper/piper-phonemize/pi/lib/. /usr/lib/ && \
           # Pre-build stable diffusion before we install a newer version of abseil (not compatible with stablediffusion-ncn)
           GO_TAGS="stablediffusion tts" GRPC_BACKENDS=backend-assets/grpc/stablediffusion make build
-
+      - name: Cache grpc
+        id: cache-grpc
+        uses: actions/cache@v3
+        with:
+          path: grpc
+          key: ${{ runner.os }}-grpc
+      - name: Build grpc
+        if: steps.cache-grpc.outputs.cache-hit != 'true'
+        run: |
           git clone --recurse-submodules -b v1.58.0 --depth 1 --shallow-submodules https://github.com/grpc/grpc && \
-              cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
-                -DgRPC_BUILD_TESTS=OFF \
-                ../.. && sudo make -j12 install
+          cd grpc && mkdir -p cmake/build && cd cmake/build && cmake -DgRPC_INSTALL=ON \
+            -DgRPC_BUILD_TESTS=OFF \
+            ../.. && sudo make -j12
+      - name: Install gRPC
+        run: |
+          cd grpc && cd cmake/build && sudo make -j12 install
       - name: Test
         run: |
           GO_TAGS="stablediffusion tts" make test
 
@@ -5,7 +5,6 @@ import (
 	"errors"
 	"fmt"
 	"os"
-	"path/filepath"
 	"strings"
 
 	config "github.com/go-skynet/LocalAI/api/config"
@@ -17,7 +16,7 @@ import (
 	"github.com/go-skynet/LocalAI/metrics"
 	"github.com/go-skynet/LocalAI/pkg/assets"
 	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
+	"github.com/go-skynet/LocalAI/pkg/startup"
 
 	"github.com/gofiber/fiber/v2"
 	"github.com/gofiber/fiber/v2/middleware/cors"
@@ -38,25 +37,7 @@ func Startup(opts ...options.AppOption) (*options.Option, *config.ConfigLoader,
 	log.Info().Msgf("Starting LocalAI using %d threads, with models path: %s", options.Threads, options.Loader.ModelPath)
 	log.Info().Msgf("LocalAI version: %s", internal.PrintableVersion())
 
-	modelPath := options.Loader.ModelPath
-	if len(options.ModelsURL) > 0 {
-		for _, url := range options.ModelsURL {
-			if utils.LooksLikeURL(url) {
-				// md5 of model name
-				md5Name := utils.MD5(url)
-
-				// check if file exists
-				if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
-					err := utils.DownloadFile(url, filepath.Join(modelPath, md5Name)+".yaml", "", func(fileName, current, total string, percent float64) {
-						utils.DisplayDownloadFunction(fileName, current, total, percent)
-					})
-					if err != nil {
-						log.Error().Msgf("error loading model: %s", err.Error())
-					}
-				}
-			}
-		}
-	}
+	startup.PreloadModelsConfigurations(options.Loader.ModelPath, options.ModelsURL...)
 
 	cl := config.NewConfigLoader()
 	if err := cl.LoadConfigs(options.Loader.ModelPath); err != nil {
 
@@ -16,9 +16,9 @@ import (
 	. "github.com/go-skynet/LocalAI/api"
 	"github.com/go-skynet/LocalAI/api/options"
 	"github.com/go-skynet/LocalAI/metrics"
+	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/gallery"
 	"github.com/go-skynet/LocalAI/pkg/model"
-	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/gofiber/fiber/v2"
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -61,7 +61,7 @@ func getModelStatus(url string) (response map[string]interface{}) {
 }
 
 func getModels(url string) (response []gallery.GalleryModel) {
-	utils.GetURI(url, func(url string, i []byte) error {
+	downloader.GetURI(url, func(url string, i []byte) error {
 		// Unmarshal YAML data into a struct
 		return json.Unmarshal(i, &response)
 	})
 
@@ -9,6 +9,7 @@ import (
 	"strings"
 	"sync"
 
+	"github.com/go-skynet/LocalAI/pkg/downloader"
 	"github.com/go-skynet/LocalAI/pkg/utils"
 	"github.com/rs/zerolog/log"
 	"gopkg.in/yaml.v3"
@@ -300,21 +301,21 @@ func (cm *ConfigLoader) Preload(modelPath string) error {
 			// Create file path
 			filePath := filepath.Join(modelPath, file.Filename)
 
-			if err := utils.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
+			if err := downloader.DownloadFile(file.URI, filePath, file.SHA256, status); err != nil {
 				return err
 			}
 		}
 
 		modelURL := config.PredictionOptions.Model
-		modelURL = utils.ConvertURL(modelURL)
+		modelURL = downloader.ConvertURL(modelURL)
 
-		if utils.LooksLikeURL(modelURL) {
+		if downloader.LooksLikeURL(modelURL) {
 			// md5 of model name
 			md5Name := utils.MD5(modelURL)
 
 			// check if file exists
 			if _, err := os.Stat(filepath.Join(modelPath, md5Name)); errors.Is(err, os.ErrNotExist) {
-				err := utils.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
+				err := downloader.DownloadFile(modelURL, filepath.Join(modelPath, md5Name), "", status)
 				if err != nil {
 					return err
 				}
 
@@ -9,7 +9,7 @@ weight = 6
 
 In order to define default prompts, model parameters (such as custom default `top_p` or `top_k`), LocalAI can be configured to serve user-defined models with a set of default parameters and templates.
 
-You can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
+In order to configure a model, you can create multiple `yaml` files in the models path or either specify a single YAML configuration file. 
 Consider the following `models` folder in the `example/chatbot-ui`:
 
 ```
@@ -96,6 +96,12 @@ Specifying a `config-file` via CLI allows to declare models in a single file as
 
 See also [chatbot-ui](https://github.com/go-skynet/LocalAI/tree/master/examples/chatbot-ui) as an example on how to use config files.
 
+It is possible to specify a full URL or a short-hand URL to a YAML model configuration file and use it on start with local-ai, for example to use phi-2:
+
+```
+local-ai github://mudler/LocalAI/examples/configurations/phi-2.yaml@master
+```
+
 ### Full config model file reference
 
 ```yaml
 
@@ -235,6 +235,14 @@ make GRPC_BACKENDS=backend-assets/grpc/llama-cpp build
 
 By default, all the backends are built.
 
+### Specific llama.cpp version
+
+To build with a specific version of llama.cpp, set `CPPLLAMA_VERSION` to the tag or wanted sha:
+
+```
+CPPLLAMA_VERSION=<sha> make build
+```
+
 ### Windows compatibility
 
 Make sure to give enough resources to the running container. See https://github.com/go-skynet/LocalAI/issues/2
@@ -15,11 +15,19 @@ This section contains instruction on how to use LocalAI with GPU acceleration.
 For accelleration for AMD or Metal HW there are no specific container images, see the [build]({{%relref "build/#acceleration" %}})
 {{% /notice %}}
 
-### CUDA
+### CUDA(NVIDIA) acceleration
 
 Requirement: nvidia-container-toolkit (installation instructions [1](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2) [2](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html))
 
-To use CUDA, use the images with the `cublas` tag.
+To check what CUDA version do you need, you can either run `nvidia-smi` or `nvcc --version`. 
+
+Alternatively, you can also check nvidia-smi with docker:
+
+```
+docker run --runtime=nvidia --rm nvidia/cuda nvidia-smi
+```
+
+To use CUDA, use the images with the `cublas` tag, for example.
 
 The image list is on [quay](https://quay.io/repository/go-skynet/local-ai?tab=tags):