Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .env
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@

### Define a list of GRPC Servers for llama-cpp workers to distribute the load
# https://github.com/ggerganov/llama.cpp/pull/6829
# https://github.com/ggerganov/llama.cpp/blob/master/examples/rpc/README.md
# https://github.com/ggerganov/llama.cpp/blob/master/tools/rpc/README.md
# LLAMACPP_GRPC_SERVERS=""

### Enable to run parallel requests
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ BINARY_NAME=local-ai
DETECT_LIBS?=true

# llama.cpp versions
CPPLLAMA_VERSION?=d7a14c42a1883a34a6553cbfe30da1e1b84dfd6a
CPPLLAMA_VERSION?=1d36b3670b285e69e58b9d687c770a2a0a192194

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
Expand Down
8 changes: 4 additions & 4 deletions backend/cpp/llama/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,8 @@ llama.cpp:
git checkout -b build $(LLAMA_VERSION) && \
git submodule update --init --recursive --depth 1 --single-branch

llama.cpp/examples/grpc-server: llama.cpp
mkdir -p llama.cpp/examples/grpc-server
llama.cpp/tools/grpc-server: llama.cpp
mkdir -p llama.cpp/tools/grpc-server
bash prepare.sh

rebuild:
Expand All @@ -70,13 +70,13 @@ rebuild:

purge:
rm -rf llama.cpp/build
rm -rf llama.cpp/examples/grpc-server
rm -rf llama.cpp/tools/grpc-server
rm -rf grpc-server

clean: purge
rm -rf llama.cpp

grpc-server: llama.cpp llama.cpp/examples/grpc-server
grpc-server: llama.cpp llama.cpp/tools/grpc-server
@echo "Building grpc-server with $(BUILD_TYPE) build type and $(CMAKE_ARGS)"
ifneq (,$(findstring sycl,$(BUILD_TYPE)))
+bash -c "source $(ONEAPI_VARS); \
Expand Down
2 changes: 1 addition & 1 deletion backend/cpp/llama/grpc-server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ struct server_params
{
std::string hostname = "127.0.0.1";
std::vector<std::string> api_keys;
std::string public_path = "examples/server/public";
std::string public_path = "tools/server/public";
std::string chat_template = "";
int32_t port = 8080;
int32_t read_timeout = 600;
Expand Down
6 changes: 3 additions & 3 deletions backend/cpp/llama/patches/01-llava.patch
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
diff --git a/tools/llava/clip.cpp b/tools/llava/clip.cpp
index 3cd0d2fa..6c5e811a 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
--- a/tools/llava/clip.cpp
+++ b/tools/llava/clip.cpp
@@ -2608,7 +2608,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
struct ggml_tensor * patches = ggml_graph_get_tensor(gf, "patches");
int* patches_data = (int*)malloc(ggml_nbytes(patches));
Expand Down
24 changes: 12 additions & 12 deletions backend/cpp/llama/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,22 @@ for patch in $(ls patches); do
patch -d llama.cpp/ -p1 < patches/$patch
done

cp -r CMakeLists.txt llama.cpp/examples/grpc-server/
cp -r grpc-server.cpp llama.cpp/examples/grpc-server/
cp -rfv json.hpp llama.cpp/examples/grpc-server/
cp -rfv utils.hpp llama.cpp/examples/grpc-server/
cp -r CMakeLists.txt llama.cpp/tools/grpc-server/
cp -r grpc-server.cpp llama.cpp/tools/grpc-server/
cp -rfv json.hpp llama.cpp/tools/grpc-server/
cp -rfv utils.hpp llama.cpp/tools/grpc-server/

if grep -q "grpc-server" llama.cpp/examples/CMakeLists.txt; then
if grep -q "grpc-server" llama.cpp/tools/CMakeLists.txt; then
echo "grpc-server already added"
else
echo "add_subdirectory(grpc-server)" >> llama.cpp/examples/CMakeLists.txt
echo "add_subdirectory(grpc-server)" >> llama.cpp/tools/CMakeLists.txt
fi

## XXX: In some versions of CMake clip wasn't being built before llama.
## This is an hack for now, but it should be fixed in the future.
cp -rfv llama.cpp/examples/llava/clip.h llama.cpp/examples/grpc-server/clip.h
cp -rfv llama.cpp/examples/llava/clip-impl.h llama.cpp/examples/grpc-server/clip-impl.h
cp -rfv llama.cpp/examples/llava/llava.cpp llama.cpp/examples/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/examples/grpc-server/llava.h
cat llama.cpp/examples/llava/llava.h >> llama.cpp/examples/grpc-server/llava.h
cp -rfv llama.cpp/examples/llava/clip.cpp llama.cpp/examples/grpc-server/clip.cpp
cp -rfv llama.cpp/tools/llava/clip.h llama.cpp/tools/grpc-server/clip.h
cp -rfv llama.cpp/tools/llava/clip-impl.h llama.cpp/tools/grpc-server/clip-impl.h
cp -rfv llama.cpp/tools/llava/llava.cpp llama.cpp/tools/grpc-server/llava.cpp
echo '#include "llama.h"' > llama.cpp/tools/grpc-server/llava.h
cat llama.cpp/tools/llava/llava.h >> llama.cpp/tools/grpc-server/llava.h
cp -rfv llama.cpp/tools/llava/clip.cpp llama.cpp/tools/grpc-server/clip.cpp
2 changes: 1 addition & 1 deletion backend/cpp/llama/utils.hpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// https://github.com/ggerganov/llama.cpp/blob/master/examples/server/utils.hpp
// https://github.com/ggerganov/llama.cpp/blob/master/tools/server/utils.hpp

#pragma once

Expand Down
Loading